From 290fc7a9df7119f21cbd31ce198daad65dcf556e Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Tue, 28 Apr 2026 19:20:09 +0200 Subject: [PATCH 01/34] xpath: implement XPath 1.0 evaluator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ports the capybara-lightpanda XPath 1.0 polyfill into Lightpanda. Exposes the WHATWG Document.evaluate / XPathResult / XPathEvaluator / XPathExpression surface and routes CDP DOM.performSearch XPath queries through the new evaluator. The capybara-lightpanda gem can drop its ~700-line JS polyfill in the next release. New module src/browser/xpath/ (Tokenizer, Parser, Ast, Evaluator, Functions, Result). New webapi types XPathResult, XPathExpression, XPathEvaluator. Coverage and stubs match the polyfill 1:1 — see capybara-lightpanda/XPATH_COMPLIANCE.md for the full spec. Tests: 91-case conformance + result-API + evaluator-API + CDP fixtures, plus the engine's Zig unit suite (601/601 pass). --- src/browser/js/bridge.zig | 3 + .../tests/cdp/perform_search_xpath.html | 8 + .../tests/xpath/document_evaluate.html | 123 +++ .../tests/xpath/xpath_conformance.html | 202 ++++ src/browser/tests/xpath/xpath_evaluator.html | 103 ++ src/browser/tests/xpath/xpath_result.html | 193 ++++ src/browser/webapi/Document.zig | 43 + src/browser/webapi/XPathEvaluator.zig | 97 ++ src/browser/webapi/XPathExpression.zig | 100 ++ src/browser/webapi/XPathResult.zig | 277 ++++++ src/browser/xpath/Ast.zig | 134 +++ src/browser/xpath/Evaluator.zig | 725 ++++++++++++++ src/browser/xpath/Functions.zig | 630 ++++++++++++ src/browser/xpath/Parser.zig | 923 ++++++++++++++++++ src/browser/xpath/Result.zig | 200 ++++ src/browser/xpath/Tokenizer.zig | 466 +++++++++ src/cdp/domains/dom.zig | 102 +- 17 files changed, 4322 insertions(+), 7 deletions(-) create mode 100644 src/browser/tests/cdp/perform_search_xpath.html create mode 100644 src/browser/tests/xpath/document_evaluate.html create mode 100644 src/browser/tests/xpath/xpath_conformance.html create mode 100644 src/browser/tests/xpath/xpath_evaluator.html create mode 100644 src/browser/tests/xpath/xpath_result.html create mode 100644 src/browser/webapi/XPathEvaluator.zig create mode 100644 src/browser/webapi/XPathExpression.zig create mode 100644 src/browser/webapi/XPathResult.zig create mode 100644 src/browser/xpath/Ast.zig create mode 100644 src/browser/xpath/Evaluator.zig create mode 100644 src/browser/xpath/Functions.zig create mode 100644 src/browser/xpath/Parser.zig create mode 100644 src/browser/xpath/Result.zig create mode 100644 src/browser/xpath/Tokenizer.zig diff --git a/src/browser/js/bridge.zig b/src/browser/js/bridge.zig index 366f83af..9761540b 100644 --- a/src/browser/js/bridge.zig +++ b/src/browser/js/bridge.zig @@ -935,6 +935,9 @@ pub const PageJsApis = flattenTypes(&.{ @import("../webapi/CryptoKey.zig"), @import("../webapi/Selection.zig"), @import("../webapi/ImageData.zig"), + @import("../webapi/XPathResult.zig"), + @import("../webapi/XPathExpression.zig"), + @import("../webapi/XPathEvaluator.zig"), }); // APIs available on Worker context globals (constructors like URL, Headers, etc.) diff --git a/src/browser/tests/cdp/perform_search_xpath.html b/src/browser/tests/cdp/perform_search_xpath.html new file mode 100644 index 00000000..e30ca1c1 --- /dev/null +++ b/src/browser/tests/cdp/perform_search_xpath.html @@ -0,0 +1,8 @@ + + +
+

1

+

2

+
+

3

+ diff --git a/src/browser/tests/xpath/document_evaluate.html b/src/browser/tests/xpath/document_evaluate.html new file mode 100644 index 00000000..2c4fdc58 --- /dev/null +++ b/src/browser/tests/xpath/document_evaluate.html @@ -0,0 +1,123 @@ + + + +

Hello

+
+

First

+

Second

+

Third

+
+ x + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/browser/tests/xpath/xpath_conformance.html b/src/browser/tests/xpath/xpath_conformance.html new file mode 100644 index 00000000..f200ecbb --- /dev/null +++ b/src/browser/tests/xpath/xpath_conformance.html @@ -0,0 +1,202 @@ + + + + XPath conformance + + + +

Hello World

+

First paragraph with emphasis.

+

Second paragraph.

+ + + + + + + + +
NameAge
Alice30
Bob25
Carol40
+
+
AB
+
Click me
+
Other link
+
+
+ + + + + + +
+ +
+
+

One

+

Two

+

Three

+
+ + + + diff --git a/src/browser/tests/xpath/xpath_evaluator.html b/src/browser/tests/xpath/xpath_evaluator.html new file mode 100644 index 00000000..6cb6a886 --- /dev/null +++ b/src/browser/tests/xpath/xpath_evaluator.html @@ -0,0 +1,103 @@ + + + +

Hello

+

One

+

Two

+ + + + + + + + + + + + + + + + + + + diff --git a/src/browser/tests/xpath/xpath_result.html b/src/browser/tests/xpath/xpath_result.html new file mode 100644 index 00000000..f7674e7b --- /dev/null +++ b/src/browser/tests/xpath/xpath_result.html @@ -0,0 +1,193 @@ + + + +

Hello

+

One

+

Two

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/browser/webapi/Document.zig b/src/browser/webapi/Document.zig index 747e08c2..095fa48f 100644 --- a/src/browser/webapi/Document.zig +++ b/src/browser/webapi/Document.zig @@ -35,6 +35,8 @@ const DOMImplementation = @import("DOMImplementation.zig"); const StyleSheetList = @import("css/StyleSheetList.zig"); const FontFaceSet = @import("css/FontFaceSet.zig"); const Selection = @import("Selection.zig"); +const XPathResult = @import("XPathResult.zig"); +const XPathExpression = @import("XPathExpression.zig"); pub const XMLDocument = @import("XMLDocument.zig"); pub const HTMLDocument = @import("HTMLDocument.zig"); @@ -412,6 +414,40 @@ pub fn createNodeIterator(_: *const Document, root: *Node, what_to_show: ?js.Val return DOMNodeIterator.init(root, try whatToShow(what_to_show), filter, frame); } +pub fn evaluate( + self: *Document, + expression: []const u8, + context_node: ?*Node, + resolver: ?js.Function, + result_type: u16, + result: ?*XPathResult, + frame: *Frame, +) !*XPathResult { + // resolver/result are no-ops in HTML mode (decision #2). + _ = resolver; + _ = result; + return XPathResult.fromExpression( + expression, + context_node orelse self.asNode(), + result_type, + frame, + ); +} + +pub fn createExpression( + _: *const Document, + expression: []const u8, + resolver: ?js.Function, + frame: *Frame, +) !*XPathExpression { + _ = resolver; + return XPathExpression.init(expression, frame); +} + +pub fn createNSResolver(_: *const Document, node: *Node) ?*Node { + return node; +} + fn whatToShow(value_: ?js.Value) !u32 { const value = value_ orelse return 4294967295; // show all when undefined if (value.isUndefined()) { @@ -1053,6 +1089,9 @@ pub const JsApi = struct { pub const createEvent = bridge.function(Document.createEvent, .{ .dom_exception = true }); pub const createTreeWalker = bridge.function(Document.createTreeWalker, .{}); pub const createNodeIterator = bridge.function(Document.createNodeIterator, .{}); + pub const evaluate = bridge.function(Document.evaluate, .{ .dom_exception = true }); + pub const createExpression = bridge.function(Document.createExpression, .{ .dom_exception = true }); + pub const createNSResolver = bridge.function(Document.createNSResolver, .{}); pub const getElementById = bridge.function(_getElementById, .{}); fn _getElementById(self: *Document, value_: ?js.Value, frame: *Frame) !?*Element { const value = value_ orelse return null; @@ -1113,3 +1152,7 @@ const testing = @import("../../testing.zig"); test "WebApi: Document" { try testing.htmlRunner("document", .{}); } + +test "WebApi: Document.evaluate" { + try testing.htmlRunner("xpath/document_evaluate.html", .{}); +} diff --git a/src/browser/webapi/XPathEvaluator.zig b/src/browser/webapi/XPathEvaluator.zig new file mode 100644 index 00000000..ec651de0 --- /dev/null +++ b/src/browser/webapi/XPathEvaluator.zig @@ -0,0 +1,97 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! WHATWG `XPathEvaluator` — a stateless factory for XPath evaluation. +//! Mirrors `Document.evaluate` / `Document.createExpression` / +//! `Document.createNSResolver` so an explicit +//! `new XPathEvaluator()` instance can be used in place of the +//! document. + +const std = @import("std"); + +const js = @import("../js/js.zig"); +const Frame = @import("../Frame.zig"); + +const Node = @import("Node.zig"); +const XPathResult = @import("XPathResult.zig"); +const XPathExpression = @import("XPathExpression.zig"); + +const XPathEvaluator = @This(); + +// Padding to avoid zero-size struct identity_map collisions (matches +// the convention in ResizeObserver.zig). +_pad: bool = false, + +pub fn init() XPathEvaluator { + return .{}; +} + +pub fn evaluate( + _: *const XPathEvaluator, + expression: []const u8, + context_node: *Node, + resolver: ?js.Function, + requested_type: u16, + result: ?*XPathResult, + frame: *Frame, +) !*XPathResult { + // Namespace resolver is accepted-and-ignored (HTML mode — decision #2). + // Result reuse is also a no-op; XPathResult.fromExpression always + // allocates a fresh instance. + _ = resolver; + _ = result; + return XPathResult.fromExpression(expression, context_node, requested_type, frame); +} + +pub fn createExpression( + _: *const XPathEvaluator, + expression: []const u8, + resolver: ?js.Function, + frame: *Frame, +) !*XPathExpression { + _ = resolver; + return XPathExpression.init(expression, frame); +} + +pub fn createNSResolver(_: *const XPathEvaluator, node: *Node) ?*Node { + // HTML-mode passthrough — the WHATWG IDL accepts a Node and returns + // an `XPathNSResolver`, but in practice the input node is reused. + return node; +} + +pub const JsApi = struct { + pub const bridge = js.Bridge(XPathEvaluator); + + pub const Meta = struct { + pub const name = "XPathEvaluator"; + pub const prototype_chain = bridge.prototypeChain(); + pub var class_id: bridge.ClassId = undefined; + pub const empty_with_no_proto = true; + }; + + pub const constructor = bridge.constructor(XPathEvaluator.init, .{}); + pub const evaluate = bridge.function(XPathEvaluator.evaluate, .{ .dom_exception = true }); + pub const createExpression = bridge.function(XPathEvaluator.createExpression, .{ .dom_exception = true }); + pub const createNSResolver = bridge.function(XPathEvaluator.createNSResolver, .{}); +}; + +const testing = @import("../../testing.zig"); + +test "WebApi: XPathEvaluator + XPathExpression" { + try testing.htmlRunner("xpath/xpath_evaluator.html", .{}); +} diff --git a/src/browser/webapi/XPathExpression.zig b/src/browser/webapi/XPathExpression.zig new file mode 100644 index 00000000..6dba00fb --- /dev/null +++ b/src/browser/webapi/XPathExpression.zig @@ -0,0 +1,100 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! WHATWG `XPathExpression` — a parsed XPath expression cached for +//! repeated evaluation. The parsed AST lives in this object's per- +//! instance arena (long-lived); each `evaluate()` call gets a fresh +//! arena for its own result data so multiple evaluations don't grow +//! the AST arena. + +const std = @import("std"); +const lp = @import("lightpanda"); + +const js = @import("../js/js.zig"); +const Page = @import("../Page.zig"); +const Frame = @import("../Frame.zig"); + +const Node = @import("Node.zig"); +const XPathResult = @import("XPathResult.zig"); + +const xpath = struct { + const Ast = @import("../xpath/Ast.zig"); + const Parser = @import("../xpath/Parser.zig"); + const Evaluator = @import("../xpath/Evaluator.zig"); +}; + +const Allocator = std.mem.Allocator; + +const XPathExpression = @This(); + +_rc: lp.RC(u8) = .{}, +_arena: Allocator, +_expr: *const xpath.Ast.Expr, + +pub fn init(expression: []const u8, frame: *Frame) !*XPathExpression { + const arena = try frame.getArena(.tiny, "XPathExpression"); + errdefer frame.releaseArena(arena); + + const expr = try xpath.Parser.parse(arena, expression); + const xe = try arena.create(XPathExpression); + xe.* = .{ ._arena = arena, ._expr = expr }; + return xe; +} + +pub fn evaluate( + self: *XPathExpression, + context_node: *Node, + requested_type: u16, + result: ?*XPathResult, + frame: *Frame, +) !*XPathResult { + // The `result` reuse parameter (WHATWG: optional XPathResult to + // populate) is accepted-and-ignored: we always allocate fresh, + // which matches every modern browser's effective behavior. + _ = result; + + const arena = try frame.getArena(.medium, "XPathResult"); + errdefer frame.releaseArena(arena); + + const eval_result = try xpath.Evaluator.evaluate(arena, frame, self._expr, context_node); + return XPathResult.fromResult(arena, requested_type, eval_result); +} + +pub fn deinit(self: *XPathExpression, page: *Page) void { + page.releaseArena(self._arena); +} + +pub fn acquireRef(self: *XPathExpression) void { + self._rc.acquire(); +} + +pub fn releaseRef(self: *XPathExpression, page: *Page) void { + self._rc.release(self, page); +} + +pub const JsApi = struct { + pub const bridge = js.Bridge(XPathExpression); + + pub const Meta = struct { + pub const name = "XPathExpression"; + pub const prototype_chain = bridge.prototypeChain(); + pub var class_id: bridge.ClassId = undefined; + }; + + pub const evaluate = bridge.function(XPathExpression.evaluate, .{ .dom_exception = true }); +}; diff --git a/src/browser/webapi/XPathResult.zig b/src/browser/webapi/XPathResult.zig new file mode 100644 index 00000000..6bf5095b --- /dev/null +++ b/src/browser/webapi/XPathResult.zig @@ -0,0 +1,277 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! WHATWG `XPathResult` (full surface, all 10 type constants — decision +//! #4). Wraps the evaluator's `Result.Result` for JS consumption: +//! coerces to the requested result type at construction, exposes the +//! type-tagged accessors, and serves the iterator/snapshot APIs. +//! +//! Lifetime model: each `XPathResult` owns a per-instance arena +//! (`getArena(.medium, ...)`) that holds both the struct and the result +//! data (node-set slice, formatted strings). The arena is released in +//! `deinit` once the JS wrapper's refcount hits zero. +//! +//! Type-mismatch accessor calls return `error.InvalidStateError` — +//! translated to a `DOMException` by `bridge.function(.., .{ +//! .dom_exception = true })`. The WHATWG IDL technically specifies +//! `TypeError` for type mismatches, but `InvalidStateError` is what +//! decision #4 captures and what most legacy XPath consumers expect. + +const std = @import("std"); +const lp = @import("lightpanda"); + +const js = @import("../js/js.zig"); +const Page = @import("../Page.zig"); +const Frame = @import("../Frame.zig"); + +const Node = @import("Node.zig"); + +// XPath runtime helpers. Aliased to keep the cross-directory imports +// readable when both modules expose a `Result` type. +const xpath = struct { + const Parser = @import("../xpath/Parser.zig"); + const Evaluator = @import("../xpath/Evaluator.zig"); + const Result = @import("../xpath/Result.zig"); +}; + +const Allocator = std.mem.Allocator; + +const XPathResult = @This(); + +// WHATWG type constants. ANY_TYPE is a request flag — at construction +// it resolves to one of the four concrete categories (NUMBER, STRING, +// BOOLEAN, UNORDERED_NODE_ITERATOR) depending on what the expression +// produced. +pub const ANY_TYPE: u16 = 0; +pub const NUMBER_TYPE: u16 = 1; +pub const STRING_TYPE: u16 = 2; +pub const BOOLEAN_TYPE: u16 = 3; +pub const UNORDERED_NODE_ITERATOR_TYPE: u16 = 4; +pub const ORDERED_NODE_ITERATOR_TYPE: u16 = 5; +pub const UNORDERED_NODE_SNAPSHOT_TYPE: u16 = 6; +pub const ORDERED_NODE_SNAPSHOT_TYPE: u16 = 7; +pub const ANY_UNORDERED_NODE_TYPE: u16 = 8; +pub const FIRST_ORDERED_NODE_TYPE: u16 = 9; + +const Value = union(enum) { + number: f64, + string: []const u8, + boolean: bool, + nodes: []const *Node, +}; + +_rc: lp.RC(u8) = .{}, +_arena: Allocator, +_type: u16, +_value: Value, +_iter_pos: usize = 0, + +// ----- constructors ----- + +/// One-shot: parse + evaluate + wrap. Used by `Document.evaluate` and +/// `XPathEvaluator.evaluate`. Allocates a per-instance arena for the +/// AST + result data + the struct itself. +pub fn fromExpression( + expression: []const u8, + context_node: *Node, + requested_type: u16, + frame: *Frame, +) !*XPathResult { + const arena = try frame.getArena(.medium, "XPathResult"); + errdefer frame.releaseArena(arena); + + const expr = try xpath.Parser.parse(arena, expression); + const result = try xpath.Evaluator.evaluate(arena, frame, expr, context_node); + return fromResult(arena, requested_type, result); +} + +/// Wrap an already-evaluated `Result.Result` into an XPathResult. The +/// caller hands over ownership of `arena` — the XPathResult will release +/// it on deinit. Used by `XPathExpression.evaluate` (which has its own +/// AST cache and only allocates a fresh result arena). +pub fn fromResult( + arena: Allocator, + requested_type: u16, + result: xpath.Result.Result, +) !*XPathResult { + const value: Value = switch (requested_type) { + ANY_TYPE => switch (result) { + .number => |n| .{ .number = n }, + .string => |s| .{ .string = s }, + .boolean => |b| .{ .boolean = b }, + .node_set => |ns| .{ .nodes = ns }, + }, + NUMBER_TYPE => .{ .number = try xpath.Result.toNumber(arena, result) }, + STRING_TYPE => .{ .string = try xpath.Result.toString(arena, result) }, + BOOLEAN_TYPE => .{ .boolean = xpath.Result.toBoolean(result) }, + UNORDERED_NODE_ITERATOR_TYPE, + ORDERED_NODE_ITERATOR_TYPE, + UNORDERED_NODE_SNAPSHOT_TYPE, + ORDERED_NODE_SNAPSHOT_TYPE, + ANY_UNORDERED_NODE_TYPE, + FIRST_ORDERED_NODE_TYPE, + => switch (result) { + .node_set => |ns| .{ .nodes = ns }, + // Requesting a node-set type for a non-node-set expression. + // WHATWG specifies TypeError, but DOMException.fromError has + // no TypeError mapping (would surface as a plain JS Error); + // unify on InvalidStateError per the project plan. + else => return error.InvalidStateError, + }, + else => return error.InvalidStateError, + }; + + const final_type: u16 = if (requested_type == ANY_TYPE) switch (value) { + .number => NUMBER_TYPE, + .string => STRING_TYPE, + .boolean => BOOLEAN_TYPE, + .nodes => UNORDERED_NODE_ITERATOR_TYPE, + } else requested_type; + + const xr = try arena.create(XPathResult); + xr.* = .{ + ._arena = arena, + ._type = final_type, + ._value = value, + }; + return xr; +} + +// ----- lifecycle ----- + +pub fn deinit(self: *XPathResult, page: *Page) void { + page.releaseArena(self._arena); +} + +pub fn acquireRef(self: *XPathResult) void { + self._rc.acquire(); +} + +pub fn releaseRef(self: *XPathResult, page: *Page) void { + self._rc.release(self, page); +} + +// ----- accessors ----- + +fn getResultType(self: *const XPathResult) u16 { + return self._type; +} + +fn getNumberValue(self: *const XPathResult) !f64 { + if (self._type != NUMBER_TYPE) return error.InvalidStateError; + return self._value.number; +} + +fn getStringValue(self: *const XPathResult) ![]const u8 { + if (self._type != STRING_TYPE) return error.InvalidStateError; + return self._value.string; +} + +fn getBooleanValue(self: *const XPathResult) !bool { + if (self._type != BOOLEAN_TYPE) return error.InvalidStateError; + return self._value.boolean; +} + +fn getSingleNodeValue(self: *const XPathResult) !?*Node { + if (self._type != ANY_UNORDERED_NODE_TYPE and self._type != FIRST_ORDERED_NODE_TYPE) { + return error.InvalidStateError; + } + return if (self._value.nodes.len == 0) null else self._value.nodes[0]; +} + +fn getSnapshotLength(self: *const XPathResult) !u32 { + if (self._type != UNORDERED_NODE_SNAPSHOT_TYPE and self._type != ORDERED_NODE_SNAPSHOT_TYPE) { + return error.InvalidStateError; + } + return @intCast(self._value.nodes.len); +} + +/// Live mutation tracking on the iterator isn't implemented — we hold a +/// frozen pointer slice, so the iterator is never "invalidated" by DOM +/// edits during traversal. Always returns false; matches the polyfill, +/// which is snapshot-only. +fn getInvalidIteratorState(_: *const XPathResult) bool { + return false; +} + +// ----- methods ----- + +pub fn iterateNext(self: *XPathResult) !?*Node { + if (self._type != UNORDERED_NODE_ITERATOR_TYPE and self._type != ORDERED_NODE_ITERATOR_TYPE) { + return error.InvalidStateError; + } + if (self._iter_pos >= self._value.nodes.len) return null; + const node = self._value.nodes[self._iter_pos]; + self._iter_pos += 1; + return node; +} + +pub fn snapshotItem(self: *const XPathResult, index: u32) !?*Node { + if (self._type != UNORDERED_NODE_SNAPSHOT_TYPE and self._type != ORDERED_NODE_SNAPSHOT_TYPE) { + return error.InvalidStateError; + } + if (index >= self._value.nodes.len) return null; + return self._value.nodes[index]; +} + +// ----- JS bridge ----- + +pub const JsApi = struct { + pub const bridge = js.Bridge(XPathResult); + + pub const Meta = struct { + pub const name = "XPathResult"; + pub const prototype_chain = bridge.prototypeChain(); + pub var class_id: bridge.ClassId = undefined; + }; + + // Type constants — both static (on the constructor) and instance + // properties per the WHATWG IDL. `template = true` makes them + // class-level so `XPathResult.ORDERED_NODE_SNAPSHOT_TYPE` works. + pub const ANY_TYPE = bridge.property(XPathResult.ANY_TYPE, .{ .template = true }); + pub const NUMBER_TYPE = bridge.property(XPathResult.NUMBER_TYPE, .{ .template = true }); + pub const STRING_TYPE = bridge.property(XPathResult.STRING_TYPE, .{ .template = true }); + pub const BOOLEAN_TYPE = bridge.property(XPathResult.BOOLEAN_TYPE, .{ .template = true }); + pub const UNORDERED_NODE_ITERATOR_TYPE = bridge.property(XPathResult.UNORDERED_NODE_ITERATOR_TYPE, .{ .template = true }); + pub const ORDERED_NODE_ITERATOR_TYPE = bridge.property(XPathResult.ORDERED_NODE_ITERATOR_TYPE, .{ .template = true }); + pub const UNORDERED_NODE_SNAPSHOT_TYPE = bridge.property(XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, .{ .template = true }); + pub const ORDERED_NODE_SNAPSHOT_TYPE = bridge.property(XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, .{ .template = true }); + pub const ANY_UNORDERED_NODE_TYPE = bridge.property(XPathResult.ANY_UNORDERED_NODE_TYPE, .{ .template = true }); + pub const FIRST_ORDERED_NODE_TYPE = bridge.property(XPathResult.FIRST_ORDERED_NODE_TYPE, .{ .template = true }); + + pub const resultType = bridge.accessor(XPathResult.getResultType, null, .{}); + pub const numberValue = bridge.accessor(XPathResult.getNumberValue, null, .{ .dom_exception = true }); + pub const stringValue = bridge.accessor(XPathResult.getStringValue, null, .{ .dom_exception = true }); + pub const booleanValue = bridge.accessor(XPathResult.getBooleanValue, null, .{ .dom_exception = true }); + pub const singleNodeValue = bridge.accessor(XPathResult.getSingleNodeValue, null, .{ .dom_exception = true }); + pub const snapshotLength = bridge.accessor(XPathResult.getSnapshotLength, null, .{ .dom_exception = true }); + pub const invalidIteratorState = bridge.accessor(XPathResult.getInvalidIteratorState, null, .{}); + + pub const iterateNext = bridge.function(XPathResult.iterateNext, .{ .dom_exception = true }); + pub const snapshotItem = bridge.function(XPathResult.snapshotItem, .{ .dom_exception = true }); +}; + +const testing = @import("../../testing.zig"); + +test "WebApi: XPathResult" { + try testing.htmlRunner("xpath/xpath_result.html", .{}); +} + +test "WebApi: XPath conformance" { + try testing.htmlRunner("xpath/xpath_conformance.html", .{}); +} diff --git a/src/browser/xpath/Ast.zig b/src/browser/xpath/Ast.zig new file mode 100644 index 00000000..00125e33 --- /dev/null +++ b/src/browser/xpath/Ast.zig @@ -0,0 +1,134 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! XPath 1.0 AST. +//! +//! Mirrors the polyfill AST in capybara-lightpanda +//! (lib/capybara/lightpanda/javascripts/index.js, the `op:`-tagged +//! object literals built by Parser.prototype.parse*). Slices and +//! pointers are arena-owned by the Parser; the AST has no destructor. + +pub const Expr = union(enum) { + /// Absolute or relative location path: `/foo/bar`, `//x`, `foo/bar`. + path: Path, + /// Filter expression followed by a location-path tail: + /// `(//a)/b`, `(expr)//c`. + filter_path: FilterPath, + /// Filter expression with a single predicate: `(expr)[n]`. + /// Multi-predicate filters nest: `(e)[1][2]` → filter(filter(e,1),2). + filter: Filter, + binop: BinOp, + /// Unary minus. The polyfill has no unary `+`. + neg: *Expr, + /// String literal, quotes stripped. + literal: []const u8, + /// Numeric literal, parsed to f64. + number: f64, + /// Variable reference. The leading `$` is stripped; per decision #3 + /// the evaluator always returns the empty string. + var_ref: []const u8, + fn_call: FnCall, +}; + +pub const Path = struct { + absolute: bool, + steps: []const Step, +}; + +pub const FilterPath = struct { + filter: *Expr, + steps: []const Step, +}; + +pub const Filter = struct { + expr: *Expr, + predicate: *Expr, +}; + +pub const BinOp = struct { + op: BinOpKind, + left: *Expr, + right: *Expr, +}; + +pub const BinOpKind = enum { + or_, + and_, + eq, + neq, + lt, + gt, + lte, + gte, + add, + sub, + mul, + div, + mod, + union_, +}; + +pub const FnCall = struct { + name: []const u8, + args: []const *Expr, +}; + +pub const Step = struct { + axis: Axis, + node_test: NodeTest, + predicates: []const *Expr, +}; + +pub const Axis = enum { + child, + descendant, + descendant_or_self, + self, + parent, + ancestor, + ancestor_or_self, + following_sibling, + preceding_sibling, + following, + preceding, + attribute, + namespace, + /// Polyfill parity (decision #2): unknown axis names parse to + /// this variant; the evaluator returns an empty node-set. + unknown, +}; + +pub const NodeTest = union(enum) { + /// Element / attribute name. Special values: + /// - "*" → wildcard + /// - "prefix:*" → namespace wildcard + /// - "prefix:local" → namespace-prefixed name + /// The evaluator splits these. + name: []const u8, + /// `node()`, `text()`, `comment()`, `processing-instruction()`. + /// The optional target literal of `processing-instruction("foo")` + /// is consumed but not stored (decision #3 stub). + type_test: TypeTest, +}; + +pub const TypeTest = enum { + node, + text, + comment, + processing_instruction, +}; diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig new file mode 100644 index 00000000..a16d7b37 --- /dev/null +++ b/src/browser/xpath/Evaluator.zig @@ -0,0 +1,725 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! XPath 1.0 evaluator — runs an `Ast.Expr` against a context node and +//! produces a `Result`. Mirrors the polyfill's `evaluate()` and +//! `evalStep()` (lib/capybara/lightpanda/javascripts/index.js, lines +//! 344–644). The evaluator allocates intermediate values (node-set +//! slices, formatted numbers, materialized attribute nodes) into the +//! caller's arena. The context `Frame` is needed for `getElementById` +//! and to materialize attributes (the attribute axis returns full +//! `Attribute` nodes so the result is `*Node`-uniform). +//! +//! Document-order sort happens once at the public boundary +//! (`evaluate()`); intermediate step results stay in axis order so +//! reverse-axis positional predicates evaluate against proximity. + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const lp = @import("lightpanda"); + +const Ast = @import("Ast.zig"); +const Parser = @import("Parser.zig"); +const Result = @import("Result.zig"); +const Functions = @import("Functions.zig"); +const Node = @import("../webapi/Node.zig"); +const Element = Node.Element; +const Document = Node.Document; +const Frame = lp.Frame; + +const Evaluator = @This(); + +pub const Error = error{ + OutOfMemory, + WriteFailed, + // Surfaces from Attribute materialization (`Entry.toAttribute` → + // `String.dupe` enforces a length limit). The polyfill never hits + // this since JS strings are unbounded, but Lightpanda's `String` + // type caps at u32::MAX bytes — propagate so callers can surface + // a DOM exception. + StringTooLarge, + UnknownFunction, + UnionRequiresNodeSets, +}; + +arena: Allocator, +frame: *Frame, + +/// Public entry. Returns the AST's value; node-sets are sorted into +/// document order before return per XPath spec §3.3. +pub fn evaluate(arena: Allocator, frame: *Frame, expr: *const Ast.Expr, context_node: *Node) Error!Result.Result { + var ev = Evaluator{ .arena = arena, .frame = frame }; + const result = try ev.evalExpr(expr, context_node, 1, 1); + if (result == .node_set) { + sortDocOrder(@constCast(result.node_set)); + } + return result; +} + +pub const SearchError = Error || Parser.Error; + +/// Convenience for `DOM.performSearch` and capybara `xpathFind`: parse + +/// evaluate and unwrap the node-set. Top-level scalar expressions yield +/// an empty slice (decision #3 — these APIs are for finding nodes, not +/// arbitrary computation). +pub fn searchAll(arena: Allocator, frame: *Frame, root: *Node, expression: []const u8) SearchError![]const *Node { + const expr = try Parser.parse(arena, expression); + return switch (try evaluate(arena, frame, expr, root)) { + .node_set => |ns| ns, + else => &.{}, + }; +} + +// ----- AST evaluation ----- + +fn evalExpr(self: *Evaluator, expr: *const Ast.Expr, ctx: *Node, pos: usize, size: usize) Error!Result.Result { + return switch (expr.*) { + .number => |n| .{ .number = n }, + .literal => |s| .{ .string = s }, + .var_ref => .{ .string = "" }, // decision #3 stub + .neg => |inner| blk: { + const v = try self.evalExpr(inner, ctx, pos, size); + const n = try Result.toNumber(self.arena, v); + break :blk .{ .number = -n }; + }, + .binop => |bo| try self.evalBinop(bo, ctx, pos, size), + .path => |p| try self.evalPath(p, ctx), + .filter_path => |fp| try self.evalFilterPath(fp, ctx, pos, size), + .filter => |f| try self.evalFilter(f, ctx, pos, size), + .fn_call => |fc| try self.evalFnCall(fc, ctx, pos, size), + }; +} + +fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result { + const start: *Node = if (path.absolute) blk: { + if (ctx._type == .document) break :blk ctx; + const owner = ctx.ownerDocument(self.frame) orelse break :blk ctx; + break :blk owner.asNode(); + } else ctx; + + var current = try self.arena.alloc(*Node, 1); + current[0] = start; + var current_set: []const *Node = current; + + for (path.steps) |step| { + const r = try self.evalStep(current_set, step); + current_set = r.node_set; + } + return .{ .node_set = current_set }; +} + +fn evalFilterPath(self: *Evaluator, fp: Ast.FilterPath, ctx: *Node, pos: usize, size: usize) Error!Result.Result { + const base = try self.evalExpr(fp.filter, ctx, pos, size); + if (base != .node_set) return base; + + var current: []const *Node = base.node_set; + for (fp.steps) |step| { + const r = try self.evalStep(current, step); + current = r.node_set; + } + return .{ .node_set = current }; +} + +fn evalFilter(self: *Evaluator, f: Ast.Filter, ctx: *Node, pos: usize, size: usize) Error!Result.Result { + const base = try self.evalExpr(f.expr, ctx, pos, size); + if (base != .node_set) return base; + + var out: std.ArrayList(*Node) = .empty; + const sz = base.node_set.len; + for (base.node_set, 0..) |n, idx| { + const k = idx + 1; + const val = try self.evalExpr(f.predicate, n, k, sz); + if (predicateMatches(val, k)) try out.append(self.arena, n); + } + return .{ .node_set = out.items }; +} + +// ----- step + axis ----- + +fn evalStep(self: *Evaluator, ctx_nodes: []const *Node, step: Ast.Step) Error!Result.Result { + var dedup: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty; + + // Pre-lowercase the name test once per step. matchNameTest does + // case-insensitive matching (decision #2); without this hoist, every + // axis node would pay the per-byte case-fold inside `eqlIgnoreCase`. + const lowered_name: ?[]const u8 = switch (step.node_test) { + .name => |n| if (std.mem.eql(u8, n, "*")) null else try std.ascii.allocLowerString(self.arena, n), + .type_test => null, + }; + + for (ctx_nodes) |ctx| { + const axis_nodes = try self.axisNodes(ctx, step.axis); + + var filtered: std.ArrayList(*Node) = .empty; + for (axis_nodes) |n| { + if (matchTest(n, step.node_test, step.axis, lowered_name)) { + try filtered.append(self.arena, n); + } + } + + var current: []const *Node = filtered.items; + for (step.predicates) |pred| { + var next: std.ArrayList(*Node) = .empty; + const sz = current.len; + for (current, 0..) |n, idx| { + const k = idx + 1; + const val = try self.evalExpr(pred, n, k, sz); + if (predicateMatches(val, k)) try next.append(self.arena, n); + } + current = next.items; + } + + for (current) |n| try dedup.put(self.arena, n, {}); + } + + return .{ .node_set = dedup.keys() }; +} + +fn axisNodes(self: *Evaluator, node: *Node, axis: Ast.Axis) Error![]const *Node { + var out: std.ArrayList(*Node) = .empty; + switch (axis) { + .child => { + var it = node.childrenIterator(); + while (it.next()) |c| try out.append(self.arena, c); + }, + .descendant => try self.appendDescendants(node, &out), + .descendant_or_self => { + try out.append(self.arena, node); + try self.appendDescendants(node, &out); + }, + .self => try out.append(self.arena, node), + .parent => { + if (node.parentNode()) |p| try out.append(self.arena, p); + }, + // Reverse axes — proximity order (nearest first). Final node-set + // is sorted to document order at the public boundary. + .ancestor => { + var p = node.parentNode(); + while (p) |n| : (p = n.parentNode()) try out.append(self.arena, n); + }, + .ancestor_or_self => { + try out.append(self.arena, node); + var p = node.parentNode(); + while (p) |n| : (p = n.parentNode()) try out.append(self.arena, n); + }, + .following_sibling => { + var s = node.nextSibling(); + while (s) |n| : (s = n.nextSibling()) try out.append(self.arena, n); + }, + .preceding_sibling => { + var s = node.previousSibling(); + while (s) |n| : (s = n.previousSibling()) try out.append(self.arena, n); + }, + .following => try self.appendFollowing(node, &out), + .preceding => try self.appendPreceding(node, &out), + .attribute => try self.appendAttributes(node, &out), + .namespace, .unknown => {}, // decision #3 stubs + } + return out.items; +} + +fn appendDescendants(self: *Evaluator, node: *Node, out: *std.ArrayList(*Node)) Error!void { + var it = node.childrenIterator(); + while (it.next()) |c| { + try out.append(self.arena, c); + try self.appendDescendants(c, out); + } +} + +fn appendFollowing(self: *Evaluator, start: *Node, out: *std.ArrayList(*Node)) Error!void { + var n: ?*Node = start; + while (n) |cur| : (n = cur.parentNode()) { + var s = cur.nextSibling(); + while (s) |sn| : (s = sn.nextSibling()) { + try out.append(self.arena, sn); + try self.appendDescendants(sn, out); + } + } +} + +fn appendPrecedingSubtree(self: *Evaluator, n: *Node, out: *std.ArrayList(*Node)) Error!void { + // Reverse document order: deepest-last children first, then self. + var c = n.lastChild(); + while (c) |child| : (c = child.previousSibling()) { + try self.appendPrecedingSubtree(child, out); + } + try out.append(self.arena, n); +} + +fn appendPreceding(self: *Evaluator, start: *Node, out: *std.ArrayList(*Node)) Error!void { + var n: ?*Node = start; + while (n) |cur| { + const parent = cur.parentNode() orelse break; + var s = cur.previousSibling(); + while (s) |sn| : (s = sn.previousSibling()) { + try self.appendPrecedingSubtree(sn, out); + } + n = parent; + } +} + +fn appendAttributes(self: *Evaluator, node: *Node, out: *std.ArrayList(*Node)) Error!void { + const el = node.is(Element) orelse return; + var it = el.attributeIterator(); + while (it.next()) |entry| { + // Materialize as full Attribute so the result is *Node-uniform. + // Allocates from frame.arena (long-lived); attribute axis is + // typically leaf, so churn is bounded. + const attr = try entry.toAttribute(el, self.frame); + try out.append(self.arena, attr._proto); + } +} + +// ----- node test matching ----- + +fn matchTest(node: *Node, test_: Ast.NodeTest, axis: Ast.Axis, lowered_name: ?[]const u8) bool { + return switch (test_) { + .type_test => |kind| switch (kind) { + .node => true, + .text => node.getNodeType() == 3, + .comment => node.getNodeType() == 8, + .processing_instruction => node.getNodeType() == 7, + }, + .name => |name| matchNameTest(node, name, axis, lowered_name), + }; +} + +fn matchNameTest(node: *Node, name: []const u8, axis: Ast.Axis, lowered_name: ?[]const u8) bool { + // `lowered_name` is non-null iff `name != "*"`. Element tag names + // (`getTagNameLower`) and html5ever-stored attribute names are already + // lowercase, so a plain `mem.eql` against the pre-lowered test name + // replaces the per-call `eqlIgnoreCase`. + if (axis == .attribute) { + if (std.mem.eql(u8, name, "*")) return node._type == .attribute; + const attr = switch (node._type) { + .attribute => |a| a, + else => return false, + }; + return std.mem.eql(u8, attr._name.str(), lowered_name.?); + } + const el = node.is(Element) orelse return false; + if (std.mem.eql(u8, name, "*")) return true; + return std.mem.eql(u8, el.getTagNameLower(), lowered_name.?); +} + +// ----- binop ----- + +fn evalBinop(self: *Evaluator, bo: Ast.BinOp, ctx: *Node, pos: usize, size: usize) Error!Result.Result { + switch (bo.op) { + .or_ => { + const l = try self.evalExpr(bo.left, ctx, pos, size); + if (Result.toBoolean(l)) return .{ .boolean = true }; + const r = try self.evalExpr(bo.right, ctx, pos, size); + return .{ .boolean = Result.toBoolean(r) }; + }, + .and_ => { + const l = try self.evalExpr(bo.left, ctx, pos, size); + if (!Result.toBoolean(l)) return .{ .boolean = false }; + const r = try self.evalExpr(bo.right, ctx, pos, size); + return .{ .boolean = Result.toBoolean(r) }; + }, + .eq, .neq, .lt, .gt, .lte, .gte => { + const l = try self.evalExpr(bo.left, ctx, pos, size); + const r = try self.evalExpr(bo.right, ctx, pos, size); + return .{ .boolean = try self.xCmp(l, r, bo.op) }; + }, + .add, .sub, .mul, .div, .mod => { + const l = try self.evalExpr(bo.left, ctx, pos, size); + const r = try self.evalExpr(bo.right, ctx, pos, size); + const ln = try Result.toNumber(self.arena, l); + const rn = try Result.toNumber(self.arena, r); + const v: f64 = switch (bo.op) { + .add => ln + rn, + .sub => ln - rn, + .mul => ln * rn, + .div => ln / rn, + // JS `%` and Zig `@rem` agree on sign for finite values + // and propagate NaN (XPath §3.5). + .mod => @rem(ln, rn), + else => unreachable, + }; + return .{ .number = v }; + }, + .union_ => { + const l = try self.evalExpr(bo.left, ctx, pos, size); + const r = try self.evalExpr(bo.right, ctx, pos, size); + if (l != .node_set or r != .node_set) return error.UnionRequiresNodeSets; + var seen: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty; + for (l.node_set) |n| try seen.put(self.arena, n, {}); + for (r.node_set) |n| try seen.put(self.arena, n, {}); + const nodes = seen.keys(); + sortDocOrder(@constCast(nodes)); + return .{ .node_set = nodes }; + }, + } +} + +// ----- comparison (XPath spec §3.4) ----- + +fn xCmp(self: *Evaluator, left: Result.Result, right: Result.Result, op: Ast.BinOpKind) Error!bool { + const is_eq = (op == .eq or op == .neq); + const l_is_set = (left == .node_set); + const r_is_set = (right == .node_set); + + if (l_is_set and r_is_set) { + // Cache right-side string-values once. Without this, each left node + // would pay |right| allocations — O(N×M) for a set×set comparison + // (e.g. `//foo = //bar` on a large page). + const right_strings = try self.arena.alloc([]const u8, right.node_set.len); + for (right.node_set, 0..) |r, i| { + right_strings[i] = try Result.stringValueOf(self.arena, r); + } + for (left.node_set) |l| { + const lv = try Result.stringValueOf(self.arena, l); + for (right_strings) |rv| { + const matched = if (is_eq) + cmpString(lv, rv, op) + else + cmpNumber(Result.stringToNumber(lv), Result.stringToNumber(rv), op); + if (matched) return true; + } + } + return false; + } + + if (l_is_set or r_is_set) { + const ns = if (l_is_set) left.node_set else right.node_set; + const other = if (l_is_set) right else left; + const ns_left = l_is_set; + + if (other == .boolean) { + const ns_b = ns.len > 0; + const a, const b = if (ns_left) .{ ns_b, other.boolean } else .{ other.boolean, ns_b }; + return cmpBool(a, b, op); + } + + for (ns) |n| { + const sv = try Result.stringValueOf(self.arena, n); + const matched = switch (other) { + .number => |num| blk: { + const sv_num = Result.stringToNumber(sv); + const a, const b = if (ns_left) .{ sv_num, num } else .{ num, sv_num }; + break :blk cmpNumber(a, b, op); + }, + .string => |s| blk: { + if (is_eq) { + const a, const b = if (ns_left) .{ sv, s } else .{ s, sv }; + break :blk cmpString(a, b, op); + } + const sv_num = Result.stringToNumber(sv); + const s_num = Result.stringToNumber(s); + const a, const b = if (ns_left) .{ sv_num, s_num } else .{ s_num, sv_num }; + break :blk cmpNumber(a, b, op); + }, + .boolean, .node_set => unreachable, // handled above + }; + if (matched) return true; + } + return false; + } + + // Neither is a node-set. + if (is_eq) { + if (left == .boolean or right == .boolean) { + return cmpBool(Result.toBoolean(left), Result.toBoolean(right), op); + } + if (left == .number or right == .number) { + const ln = try Result.toNumber(self.arena, left); + const rn = try Result.toNumber(self.arena, right); + return cmpNumber(ln, rn, op); + } + const ls = try Result.toString(self.arena, left); + const rs = try Result.toString(self.arena, right); + return cmpString(ls, rs, op); + } + // Non-eq with no node-set: both → number. + const ln = try Result.toNumber(self.arena, left); + const rn = try Result.toNumber(self.arena, right); + return cmpNumber(ln, rn, op); +} + +fn cmpString(a: []const u8, b: []const u8, op: Ast.BinOpKind) bool { + const equal = std.mem.eql(u8, a, b); + return switch (op) { + .eq => equal, + .neq => !equal, + else => unreachable, // <, > etc. always coerce to number first + }; +} + +fn cmpNumber(a: f64, b: f64, op: Ast.BinOpKind) bool { + // Native f64 comparison gives correct NaN semantics: + // NaN == X is false, NaN != X is true, NaN < X (etc.) is false. + return switch (op) { + .eq => a == b, + .neq => a != b, + .lt => a < b, + .gt => a > b, + .lte => a <= b, + .gte => a >= b, + else => unreachable, + }; +} + +fn cmpBool(a: bool, b: bool, op: Ast.BinOpKind) bool { + return switch (op) { + .eq => a == b, + .neq => a != b, + else => unreachable, + }; +} + +// ----- function calls ----- + +fn evalFnCall(self: *Evaluator, fc: Ast.FnCall, ctx: *Node, pos: usize, size: usize) Error!Result.Result { + // position()/last() stay here — they need the (pos, size) closure + // that Functions.call doesn't see. Keeping them inline avoids + // pushing per-call context through Functions' signature. + if (std.mem.eql(u8, fc.name, "position")) return .{ .number = @floatFromInt(pos) }; + if (std.mem.eql(u8, fc.name, "last")) return .{ .number = @floatFromInt(size) }; + + // Eagerly evaluate args. Matches the polyfill's `evaluate(args[i], ...)` + // pattern; lazy short-circuit isn't needed because `or`/`and` are + // binops handled in evalBinop, not function calls. + const eval_args = try self.arena.alloc(Result.Result, fc.args.len); + for (fc.args, 0..) |a, i| eval_args[i] = try self.evalExpr(a, ctx, pos, size); + + return Functions.call(self.arena, self.frame, fc.name, eval_args, ctx); +} + +// ----- helpers ----- + +fn predicateMatches(val: Result.Result, position: usize) bool { + return switch (val) { + // Numeric predicate value selects only the node at that position + // (1-based). Non-integer numbers never match. + .number => |n| n == @as(f64, @floatFromInt(position)), + else => Result.toBoolean(val), + }; +} + +pub fn sortDocOrder(nodes: []*Node) void { + if (nodes.len <= 1) return; + std.mem.sort(*Node, nodes, {}, lessThanDocOrder); +} + +fn lessThanDocOrder(_: void, a: *Node, b: *Node) bool { + if (a == b) return false; + const pos = a.compareDocumentPosition(b); + // FOLLOWING (0x04) — b comes after a in document order. + return (pos & 0x04) != 0; +} + +// --------------------------------------------------------------------- +// Tests — pure-logic only. DOM-dependent evaluation lands as HTML +// fixtures in Phase 9 (tests/xpath/*.html); Lightpanda has no in-Zig +// way to construct a Frame + Document tree without the JS runtime. +// --------------------------------------------------------------------- + +const testing = std.testing; +const Tokenizer = @import("Tokenizer.zig"); + +test "Evaluator: cmpNumber NaN semantics" { + const nan = std.math.nan(f64); + try testing.expect(!cmpNumber(nan, nan, .eq)); + try testing.expect(cmpNumber(nan, nan, .neq)); + try testing.expect(!cmpNumber(nan, 0, .lt)); + try testing.expect(!cmpNumber(nan, 0, .gt)); + try testing.expect(!cmpNumber(nan, 0, .lte)); + try testing.expect(!cmpNumber(nan, 0, .gte)); + try testing.expect(cmpNumber(0, 0, .eq)); + try testing.expect(cmpNumber(1, 2, .lt)); + try testing.expect(cmpNumber(2, 1, .gt)); + try testing.expect(cmpNumber(1, 1, .lte)); + try testing.expect(cmpNumber(1, 1, .gte)); +} + +test "Evaluator: cmpString" { + try testing.expect(cmpString("a", "a", .eq)); + try testing.expect(!cmpString("a", "b", .eq)); + try testing.expect(cmpString("a", "b", .neq)); + try testing.expect(!cmpString("a", "a", .neq)); +} + +test "Evaluator: cmpBool" { + try testing.expect(cmpBool(true, true, .eq)); + try testing.expect(!cmpBool(true, false, .eq)); + try testing.expect(cmpBool(true, false, .neq)); +} + +test "Evaluator: predicateMatches numeric vs boolean" { + try testing.expect(predicateMatches(.{ .number = 1 }, 1)); + try testing.expect(!predicateMatches(.{ .number = 2 }, 1)); + // Non-integer never matches. + try testing.expect(!predicateMatches(.{ .number = 1.5 }, 1)); + // Boolean: any truthy value passes regardless of position. + try testing.expect(predicateMatches(.{ .boolean = true }, 7)); + try testing.expect(!predicateMatches(.{ .boolean = false }, 1)); + // String: nonempty truthy. + try testing.expect(predicateMatches(.{ .string = "x" }, 99)); + try testing.expect(!predicateMatches(.{ .string = "" }, 1)); + // Empty node-set: falsy. + try testing.expect(!predicateMatches(.{ .node_set = &.{} }, 1)); +} + +test "Evaluator: scalar arithmetic via parsed expressions" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "1 + 2", 3 }, + .{ "5 - 3", 2 }, + .{ "4 * 2", 8 }, + .{ "10 div 4", 2.5 }, + .{ "10 mod 3", 1 }, + .{ "-5", -5 }, + .{ "1 + 2 * 3", 7 }, + }) |case| { + const expr = try Parser.parse(a, case[0]); + // Frame is unused for pure-arithmetic AST. The unsafe cast lets + // us exercise binop / number paths without a real DOM. Any path + // accessing the Frame would crash; the inputs above never do. + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const ctx_dummy: *Node = @ptrFromInt(0x2000); + const r = try ev.evalExpr(expr, ctx_dummy, 1, 1); + try testing.expect(r == .number); + try testing.expectEqual(@as(f64, case[1]), r.number); + } +} + +test "Evaluator: scalar comparison via parsed expressions" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "1 = 1", true }, + .{ "1 = 2", false }, + .{ "1 != 2", true }, + .{ "1 < 2", true }, + .{ "2 < 1", false }, + .{ "1 <= 1", true }, + .{ "2 >= 2", true }, + .{ "'abc' = 'abc'", true }, + .{ "'abc' != 'abd'", true }, + }) |case| { + const expr = try Parser.parse(a, case[0]); + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const ctx_dummy: *Node = @ptrFromInt(0x2000); + const r = try ev.evalExpr(expr, ctx_dummy, 1, 1); + try testing.expect(r == .boolean); + try testing.expectEqual(case[1], r.boolean); + } +} + +test "Evaluator: position() and last() reflect context" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + const ctx_dummy: *Node = @ptrFromInt(0x2000); + + { + const expr = try Parser.parse(a, "position()"); + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const r = try ev.evalExpr(expr, ctx_dummy, 3, 5); + try testing.expectEqual(@as(f64, 3), r.number); + } + { + const expr = try Parser.parse(a, "last()"); + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const r = try ev.evalExpr(expr, ctx_dummy, 3, 5); + try testing.expectEqual(@as(f64, 5), r.number); + } + { + // Logical short-circuit: last() never evaluates if first + // operand is true. + const expr = try Parser.parse(a, "1 = 1 or last() > 0"); + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const r = try ev.evalExpr(expr, ctx_dummy, 1, 1); + try testing.expect(r.boolean); + } +} + +test "Evaluator: short-circuit and/or" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + const ctx_dummy: *Node = @ptrFromInt(0x2000); + + inline for (.{ + .{ "1 = 2 or 1 = 1", true }, + .{ "1 = 1 and 1 = 2", false }, + .{ "1 = 1 and 2 = 2", true }, + .{ "1 = 2 and 1 = 1", false }, + .{ "1 = 2 or 2 = 1", false }, + }) |case| { + const expr = try Parser.parse(a, case[0]); + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const r = try ev.evalExpr(expr, ctx_dummy, 1, 1); + try testing.expect(r == .boolean); + try testing.expectEqual(case[1], r.boolean); + } +} + +test "Evaluator: unary minus" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + const ctx_dummy: *Node = @ptrFromInt(0x2000); + + const expr = try Parser.parse(a, "-(3 + 2)"); + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const r = try ev.evalExpr(expr, ctx_dummy, 1, 1); + try testing.expectEqual(@as(f64, -5), r.number); +} + +test "Evaluator: division by zero produces infinity / NaN per IEEE" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + const ctx_dummy: *Node = @ptrFromInt(0x2000); + + { + const expr = try Parser.parse(a, "1 div 0"); + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const r = try ev.evalExpr(expr, ctx_dummy, 1, 1); + try testing.expect(std.math.isPositiveInf(r.number)); + } + { + const expr = try Parser.parse(a, "0 div 0"); + var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) }; + const r = try ev.evalExpr(expr, ctx_dummy, 1, 1); + try testing.expect(std.math.isNan(r.number)); + } +} + +test "Evaluator: searchAll on scalar expression returns empty (decision #3)" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + // Synthetic frame/root pointers are safe here because pure-scalar + // expressions (binop, literal, true(), comparison) never reach into + // the Frame or the context node. Adding a DOM-touching expression + // (e.g. `id('x')`) to this list would crash on dereference. + inline for (.{ "1 + 2", "'hello'", "true()", "1 = 1" }) |expr| { + const nodes = try searchAll(a, @ptrFromInt(0x1000), @ptrFromInt(0x2000), expr); + try testing.expectEqual(@as(usize, 0), nodes.len); + } +} diff --git a/src/browser/xpath/Functions.zig b/src/browser/xpath/Functions.zig new file mode 100644 index 00000000..d0ae7eac --- /dev/null +++ b/src/browser/xpath/Functions.zig @@ -0,0 +1,630 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! XPath 1.0 core function library — 25 functions per polyfill parity +//! (lib/capybara/lightpanda/javascripts/index.js, `evalFunc` at lines +//! 646–770). `position()` and `last()` live in `Evaluator.evalFnCall` +//! because they need the `(pos, size)` closure that this module never +//! sees. +//! +//! Args are pre-evaluated by the caller (`Evaluator.evalFnCall`). Eager +//! evaluation matches the polyfill's `evaluate(args[i], ctx, pos, size)` +//! pattern — short-circuit operators (`or`/`and`) are binops, not +//! function calls, so laziness isn't required here. The pre-evaluation +//! contract also keeps Functions.zig free of a circular import on +//! Evaluator.zig. +//! +//! Stubs per decision #3 (XPATH_COMPLIANCE.md): +//! - `lang(string)` → always false +//! - `namespace-uri(...)` → always "" +//! - `name`/`local-name` → lowercased (HTML pragmatism) +//! +//! Allocations land in the caller's per-evaluation arena. + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const lp = @import("lightpanda"); + +const Result = @import("Result.zig"); +const Node = @import("../webapi/Node.zig"); +const Element = Node.Element; +const Document = Node.Document; +const Frame = lp.Frame; + +pub const Error = error{ + OutOfMemory, + WriteFailed, + StringTooLarge, + UnknownFunction, +}; + +/// Dispatch a core-library function call. Returns `error.UnknownFunction` +/// if `name` doesn't match — the caller (Evaluator) handles +/// `position()` / `last()` inline before getting here, so this is the +/// last lookup stop. +pub fn call( + arena: Allocator, + frame: *Frame, + name: []const u8, + args: []const Result.Result, + ctx: *Node, +) Error!Result.Result { + // -- Node-set -- + if (eql(name, "count")) return .{ .number = countFn(args) }; + if (eql(name, "id")) return idFn(arena, frame, args, ctx); + if (eql(name, "local-name")) return .{ .string = try localNameFn(arena, args, ctx) }; + if (eql(name, "name")) return .{ .string = try nameFn(arena, args, ctx) }; + if (eql(name, "namespace-uri")) return .{ .string = "" }; + + // -- String -- + if (eql(name, "string")) return .{ .string = try stringFn(arena, args, ctx) }; + if (eql(name, "concat")) return .{ .string = try concatFn(arena, args) }; + if (eql(name, "starts-with")) return .{ .boolean = try startsWithFn(arena, args) }; + if (eql(name, "contains")) return .{ .boolean = try containsFn(arena, args) }; + if (eql(name, "substring-before")) return .{ .string = try substringBeforeFn(arena, args) }; + if (eql(name, "substring-after")) return .{ .string = try substringAfterFn(arena, args) }; + if (eql(name, "substring")) return .{ .string = try substringFn(arena, args) }; + if (eql(name, "string-length")) return .{ .number = try stringLengthFn(arena, args, ctx) }; + if (eql(name, "normalize-space")) return .{ .string = try normalizeSpaceFn(arena, args, ctx) }; + if (eql(name, "translate")) return .{ .string = try translateFn(arena, args) }; + + // -- Boolean -- + if (eql(name, "boolean")) return .{ .boolean = if (args.len == 0) false else Result.toBoolean(args[0]) }; + if (eql(name, "not")) return .{ .boolean = if (args.len == 0) true else !Result.toBoolean(args[0]) }; + if (eql(name, "true")) return .{ .boolean = true }; + if (eql(name, "false")) return .{ .boolean = false }; + if (eql(name, "lang")) return .{ .boolean = false }; + + // -- Number -- + if (eql(name, "number")) return .{ .number = try numberFn(arena, args, ctx) }; + if (eql(name, "sum")) return .{ .number = try sumFn(arena, args) }; + if (eql(name, "floor")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.floor(try Result.toNumber(arena, args[0])) }; + if (eql(name, "ceiling")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.ceil(try Result.toNumber(arena, args[0])) }; + if (eql(name, "round")) return .{ .number = if (args.len == 0) std.math.nan(f64) else roundHalfToPosInf(try Result.toNumber(arena, args[0])) }; + + return error.UnknownFunction; +} + +inline fn eql(a: []const u8, b: []const u8) bool { + return std.mem.eql(u8, a, b); +} + +// ----- node-set fns ----- + +fn countFn(args: []const Result.Result) f64 { + if (args.len == 0 or args[0] != .node_set) return 0; + return @floatFromInt(args[0].node_set.len); +} + +fn idFn(arena: Allocator, frame: *Frame, args: []const Result.Result, ctx: *Node) Error!Result.Result { + if (args.len == 0) return .{ .node_set = &.{} }; + + // Polyfill: node-set arg → join `stringVal(n)` of each by ' '. Scalar + // arg → `toStr`. Then split on whitespace and look up each token. + const id_str: []const u8 = blk: { + if (args[0] == .node_set) { + var buf = std.Io.Writer.Allocating.init(arena); + for (args[0].node_set, 0..) |n, i| { + if (i > 0) try buf.writer.writeByte(' '); + const sv = try Result.stringValueOf(arena, n); + try buf.writer.writeAll(sv); + } + break :blk buf.written(); + } + break :blk try Result.toString(arena, args[0]); + }; + + // `ctx.ownerDocument || ctx` — document nodes own themselves. + const doc = ctx.ownerDocument(frame) orelse (ctx.is(Document) orelse return .{ .node_set = &.{} }); + + var seen: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty; + var it = std.mem.tokenizeAny(u8, id_str, &std.ascii.whitespace); + while (it.next()) |tok| { + if (doc.getElementById(tok, frame)) |el| { + try seen.put(arena, el.asNode(), {}); + } + } + return .{ .node_set = seen.keys() }; +} + +fn localNameFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]const u8 { + const node = firstNodeOrCtx(args, ctx) orelse return ""; + // For Element, `getLocalName` returns a slice into `_tag_name` + // (lowercase, namespace-prefix stripped) — lifetime exceeds the + // per-evaluation arena, so we borrow instead of duping. + if (node.is(Element)) |el| return el.getLocalName(); + var buf: [256]u8 = undefined; + return std.ascii.allocLowerString(arena, node.getNodeName(&buf)); +} + +fn nameFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]const u8 { + const node = firstNodeOrCtx(args, ctx) orelse return ""; + // Diverges from `local-name` only on namespaced elements: `name` + // keeps the prefix (`ns:foo`), `local-name` strips it (`foo`). + if (node.is(Element)) |el| return el.getTagNameLower(); + var buf: [256]u8 = undefined; + return std.ascii.allocLowerString(arena, node.getNodeName(&buf)); +} + +fn firstNodeOrCtx(args: []const Result.Result, ctx: *Node) ?*Node { + if (args.len == 0) return ctx; + if (args[0] != .node_set) return null; + if (args[0].node_set.len == 0) return null; + return args[0].node_set[0]; +} + +// ----- string fns ----- + +fn stringFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]const u8 { + if (args.len == 0) return try Result.stringValueOf(arena, ctx); + return try Result.toString(arena, args[0]); +} + +fn concatFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { + var buf = std.Io.Writer.Allocating.init(arena); + for (args) |a| { + const s = try Result.toString(arena, a); + try buf.writer.writeAll(s); + } + return buf.written(); +} + +fn startsWithFn(arena: Allocator, args: []const Result.Result) Error!bool { + if (args.len < 2) return false; + const s1 = try Result.toString(arena, args[0]); + const s2 = try Result.toString(arena, args[1]); + return std.mem.startsWith(u8, s1, s2); +} + +fn containsFn(arena: Allocator, args: []const Result.Result) Error!bool { + if (args.len < 2) return false; + const s1 = try Result.toString(arena, args[0]); + const s2 = try Result.toString(arena, args[1]); + return std.mem.indexOf(u8, s1, s2) != null; +} + +fn substringBeforeFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { + if (args.len < 2) return ""; + const s1 = try Result.toString(arena, args[0]); + const s2 = try Result.toString(arena, args[1]); + if (std.mem.indexOf(u8, s1, s2)) |idx| { + return s1[0..idx]; + } + return ""; +} + +fn substringAfterFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { + if (args.len < 2) return ""; + const s1 = try Result.toString(arena, args[0]); + const s2 = try Result.toString(arena, args[1]); + if (std.mem.indexOf(u8, s1, s2)) |idx| { + return s1[idx + s2.len ..]; + } + return ""; +} + +fn substringFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { + if (args.len < 2) return ""; + const s = try Result.toString(arena, args[0]); + const start_raw = try Result.toNumber(arena, args[1]); + if (std.math.isNan(start_raw)) return ""; + const start = roundHalfToPosInf(start_raw); + + const s_len: f64 = @floatFromInt(s.len); + if (args.len >= 3) { + const len_raw = try Result.toNumber(arena, args[2]); + if (std.math.isNan(len_raw)) return ""; + const len = roundHalfToPosInf(len_raw); + const sum = start - 1 + len; + // -inf + inf is NaN; @intFromFloat(NaN) is illegal behavior. + if (std.math.isNan(sum)) return ""; + const si_f = @max(start - 1, 0); + const ei_f = @min(sum, s_len); + if (si_f >= ei_f) return ""; + const si: usize = @intFromFloat(si_f); + const ei: usize = @intFromFloat(ei_f); + return s[si..ei]; + } + + const si_f = @max(start - 1, 0); + if (si_f >= s_len) return ""; + const si: usize = @intFromFloat(si_f); + return s[si..]; +} + +fn stringLengthFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error!f64 { + const s = if (args.len == 0) + try Result.stringValueOf(arena, ctx) + else + try Result.toString(arena, args[0]); + // Polyfill returns UTF-16 code units; we return UTF-8 bytes. They + // agree on ASCII (the gem's 91-case battery is ASCII-only). See + // .claude/skills/xpath-port/NOTES.md for the divergence rationale. + return @floatFromInt(s.len); +} + +fn normalizeSpaceFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]const u8 { + const s = if (args.len == 0) + try Result.stringValueOf(arena, ctx) + else + try Result.toString(arena, args[0]); + + const trimmed = std.mem.trim(u8, s, &std.ascii.whitespace); + if (trimmed.len == 0) return ""; + + var buf = std.Io.Writer.Allocating.init(arena); + var prev_space = false; + for (trimmed) |c| { + if (std.ascii.isWhitespace(c)) { + if (!prev_space) try buf.writer.writeByte(' '); + prev_space = true; + } else { + try buf.writer.writeByte(c); + prev_space = false; + } + } + return buf.written(); +} + +fn translateFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { + if (args.len < 3) return ""; + const s = try Result.toString(arena, args[0]); + const from = try Result.toString(arena, args[1]); + const to = try Result.toString(arena, args[2]); + + var buf = std.Io.Writer.Allocating.init(arena); + for (s) |c| { + if (std.mem.indexOfScalar(u8, from, c)) |idx| { + // Chars in `from` past `to.len` are deleted (no copy). + if (idx < to.len) try buf.writer.writeByte(to[idx]); + } else { + try buf.writer.writeByte(c); + } + } + return buf.written(); +} + +// ----- number fns ----- + +fn numberFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error!f64 { + if (args.len == 0) { + const sv = try Result.stringValueOf(arena, ctx); + return Result.stringToNumber(sv); + } + return try Result.toNumber(arena, args[0]); +} + +fn sumFn(arena: Allocator, args: []const Result.Result) Error!f64 { + if (args.len == 0 or args[0] != .node_set) return std.math.nan(f64); + var total: f64 = 0; + for (args[0].node_set) |n| { + const sv = try Result.stringValueOf(arena, n); + total += Result.stringToNumber(sv); + } + return total; +} + +/// Round half toward positive infinity. Matches JS `Math.round` (the +/// polyfill calls it for both `round()` and `substring()`): +/// round(0.5) = 1 round(-0.5) = 0 +/// round(1.5) = 2 round(-1.5) = -1 +/// Diverges from Zig's `@round` (away from zero): `@round(-0.5) = -1`. +fn roundHalfToPosInf(n: f64) f64 { + if (std.math.isNan(n) or !std.math.isFinite(n)) return n; + return std.math.floor(n + 0.5); +} + +// --------------------------------------------------------------------- +// Tests — pure-logic only. Functions that need a real DOM (id, name, +// local-name, string with element ctx, sum, count of node-set, etc.) +// are exercised via Phase 9 HTML fixtures in tests/xpath/. +// --------------------------------------------------------------------- + +const testing = std.testing; +const Tokenizer = @import("Tokenizer.zig"); +const Parser = @import("Parser.zig"); +const Evaluator = @import("Evaluator.zig"); + +fn evalScalar(a: Allocator, src: []const u8) !Result.Result { + const expr = try Parser.parse(a, src); + // Synthetic Frame/Node pointers — the public `evaluate` entry only + // touches the Frame for path/axis evaluation. Pure-scalar expressions + // (arithmetic, function calls returning scalars) never deref it. + return Evaluator.evaluate(a, @ptrFromInt(0x1000), expr, @ptrFromInt(0x2000)); +} + +test "Functions: count() of non-node-set returns 0" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const r = try evalScalar(arena.allocator(), "count('hello')"); + try testing.expect(r == .number); + try testing.expectEqual(@as(f64, 0), r.number); +} + +test "Functions: string() on scalar coerces" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "string(42)", "42" }, + .{ "string(3.14)", "3.14" }, + .{ "string(true())", "true" }, + .{ "string(false())", "false" }, + .{ "string('hello')", "hello" }, + .{ "string(0)", "0" }, + .{ "string(-1)", "-1" }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .string); + try testing.expectEqualStrings(case[1], r.string); + } +} + +test "Functions: concat() variadic" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "concat('a', 'b')", "ab" }, + .{ "concat('a', 'b', 'c')", "abc" }, + .{ "concat('foo', '-', 'bar', '-', 'baz')", "foo-bar-baz" }, + .{ "concat('x', 1, 'y')", "x1y" }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .string); + try testing.expectEqualStrings(case[1], r.string); + } +} + +test "Functions: starts-with / contains" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "starts-with('hello', 'he')", true }, + .{ "starts-with('hello', 'el')", false }, + .{ "starts-with('hello', '')", true }, + .{ "contains('hello world', 'wor')", true }, + .{ "contains('hello', 'xyz')", false }, + .{ "contains('hello', '')", true }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .boolean); + try testing.expectEqual(case[1], r.boolean); + } +} + +test "Functions: substring-before / substring-after" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "substring-before('1999/04/01', '/')", "1999" }, + .{ "substring-before('hello', 'xyz')", "" }, + .{ "substring-after('1999/04/01', '/')", "04/01" }, + .{ "substring-after('hello', 'xyz')", "" }, + .{ "substring-after('hello', '')", "hello" }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .string); + try testing.expectEqualStrings(case[1], r.string); + } +} + +test "Functions: substring() — XPath 1-based, rounding, NaN handling" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "substring('12345', 2, 3)", "234" }, + .{ "substring('12345', 2)", "2345" }, + // XPath spec example: round(1.5) = 2 → start at pos 2, len 2. + .{ "substring('12345', 1.5, 2.6)", "234" }, + // start = 0: si = max(-1, 0) = 0, ei = min(0 - 1 + 3, len) = 2. + .{ "substring('12345', 0, 3)", "12" }, + // Negative start clamps to 0. + .{ "substring('12345', -3, 7)", "123" }, + // NaN start. + .{ "substring('12345', 'foo')", "" }, + // NaN length. + .{ "substring('12345', 1, 'foo')", "" }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .string); + try testing.expectEqualStrings(case[1], r.string); + } +} + +test "Functions: string-length on scalar arg" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "string-length('hello')", 5 }, + .{ "string-length('')", 0 }, + .{ "string-length('a b c')", 5 }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .number); + try testing.expectEqual(@as(f64, case[1]), r.number); + } +} + +test "Functions: normalize-space" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "normalize-space(' hello world ')", "hello world" }, + .{ "normalize-space('hello')", "hello" }, + .{ "normalize-space('')", "" }, + .{ "normalize-space(' ')", "" }, + .{ "normalize-space('a\tb\nc')", "a b c" }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .string); + try testing.expectEqualStrings(case[1], r.string); + } +} + +test "Functions: translate" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + // Standard XPath spec example. + .{ "translate('bar', 'abc', 'ABC')", "BAr" }, + // Char in `from` past `to.len` is deleted. + .{ "translate('--aaa--', 'abc-', 'ABC')", "AAA" }, + .{ "translate('hello', '', '')", "hello" }, + // Identity. + .{ "translate('abc', 'abc', 'abc')", "abc" }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .string); + try testing.expectEqualStrings(case[1], r.string); + } +} + +test "Functions: boolean / not / true / false / lang" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "true()", true }, + .{ "false()", false }, + .{ "not(true())", false }, + .{ "not(false())", true }, + .{ "boolean(1)", true }, + .{ "boolean(0)", false }, + .{ "boolean('')", false }, + .{ "boolean('x')", true }, + // lang is a stub — always false. + .{ "lang('en')", false }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .boolean); + try testing.expectEqual(case[1], r.boolean); + } +} + +test "Functions: number() on scalar arg" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + { + const r = try evalScalar(a, "number('42')"); + try testing.expectEqual(@as(f64, 42), r.number); + } + { + const r = try evalScalar(a, "number(true())"); + try testing.expectEqual(@as(f64, 1), r.number); + } + { + const r = try evalScalar(a, "number(false())"); + try testing.expectEqual(@as(f64, 0), r.number); + } + { + const r = try evalScalar(a, "number('foo')"); + try testing.expect(std.math.isNan(r.number)); + } +} + +test "Functions: floor / ceiling / round" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + inline for (.{ + .{ "floor(1.5)", 1 }, + .{ "floor(-1.5)", -2 }, + .{ "floor(0)", 0 }, + .{ "ceiling(1.5)", 2 }, + .{ "ceiling(-1.5)", -1 }, + .{ "ceiling(0)", 0 }, + // Half-toward-positive-infinity (JS Math.round behavior). + .{ "round(0.5)", 1 }, + .{ "round(-0.5)", 0 }, + .{ "round(1.5)", 2 }, + .{ "round(-1.5)", -1 }, + .{ "round(2.5)", 3 }, + }) |case| { + const r = try evalScalar(a, case[0]); + try testing.expect(r == .number); + try testing.expectEqual(@as(f64, case[1]), r.number); + } +} + +test "Functions: round/floor/ceiling propagate NaN and Infinity" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + { + const r = try evalScalar(a, "round(1 div 0)"); // +Infinity + try testing.expect(std.math.isPositiveInf(r.number)); + } + { + const r = try evalScalar(a, "round(0 div 0)"); // NaN + try testing.expect(std.math.isNan(r.number)); + } + { + const r = try evalScalar(a, "floor(0 div 0)"); + try testing.expect(std.math.isNan(r.number)); + } + { + const r = try evalScalar(a, "ceiling(0 div 0)"); + try testing.expect(std.math.isNan(r.number)); + } +} + +test "Functions: sum / count on non-node-set defaults" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + + { + const r = try evalScalar(a, "sum('hello')"); + try testing.expect(std.math.isNan(r.number)); + } + { + const r = try evalScalar(a, "count('hello')"); + try testing.expectEqual(@as(f64, 0), r.number); + } +} + +test "Functions: roundHalfToPosInf" { + try testing.expectEqual(@as(f64, 1), roundHalfToPosInf(0.5)); + try testing.expectEqual(@as(f64, 0), roundHalfToPosInf(-0.5)); + try testing.expectEqual(@as(f64, 2), roundHalfToPosInf(1.5)); + try testing.expectEqual(@as(f64, -1), roundHalfToPosInf(-1.5)); + try testing.expectEqual(@as(f64, 3), roundHalfToPosInf(2.5)); + try testing.expect(std.math.isNan(roundHalfToPosInf(std.math.nan(f64)))); + try testing.expect(std.math.isPositiveInf(roundHalfToPosInf(std.math.inf(f64)))); + try testing.expect(std.math.isNegativeInf(roundHalfToPosInf(-std.math.inf(f64)))); +} diff --git a/src/browser/xpath/Parser.zig b/src/browser/xpath/Parser.zig new file mode 100644 index 00000000..88d25b26 --- /dev/null +++ b/src/browser/xpath/Parser.zig @@ -0,0 +1,923 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! XPath 1.0 expression parser. +//! +//! Mirrors the polyfill `Parser.prototype.*` chain in capybara-lightpanda +//! (lib/capybara/lightpanda/javascripts/index.js): recursive descent over +//! a fully-tokenized stream, producing an `Ast.Expr` tree allocated on +//! the caller's arena. The AST borrows string/name slices from `input` +//! and is valid for as long as the arena and input outlive it. + +const std = @import("std"); +const Allocator = std.mem.Allocator; + +const Tokenizer = @import("Tokenizer.zig"); +const Token = Tokenizer.Token; +const Ast = @import("Ast.zig"); + +const Parser = @This(); + +pub const Error = error{ + OutOfMemory, + UnexpectedToken, + ExpectedNodeTest, + ExpectedPrimaryExpr, +}; + +arena: Allocator, +tokens: []const Token, +pos: usize = 0, + +pub fn parse(arena: Allocator, input: []const u8) Error!*Ast.Expr { + var token_list: std.ArrayList(Token) = .empty; + // Token count is bounded by input length; ¼-byte-per-token is + // generous for typical XPath and skips ArrayList regrowth. + try token_list.ensureTotalCapacity(arena, @max(8, input.len / 4)); + var tokenizer = Tokenizer{ .input = input }; + while (true) { + const tok = tokenizer.next(); + try token_list.append(arena, tok); + if (tok == .eof) break; + } + + var parser = Parser{ + .arena = arena, + .tokens = token_list.items, + }; + const expr = try parser.parseExpr(); + if (parser.peek() != .eof) return error.UnexpectedToken; + return expr; +} + +// --- token cursor helpers --- + +fn peek(self: *const Parser) Token { + return self.tokens[self.pos]; +} + +fn lookahead(self: *const Parser, offset: usize) Token { + const idx = self.pos + offset; + if (idx >= self.tokens.len) return .eof; + return self.tokens[idx]; +} + +fn advance(self: *Parser) Token { + const tok = self.tokens[self.pos]; + self.pos += 1; + return tok; +} + +fn at(self: *const Parser, tag: std.meta.Tag(Token)) bool { + return self.peek() == tag; +} + +fn match(self: *Parser, tag: std.meta.Tag(Token)) bool { + if (self.at(tag)) { + _ = self.advance(); + return true; + } + return false; +} + +fn expect(self: *Parser, tag: std.meta.Tag(Token)) Error!Token { + if (!self.at(tag)) return error.UnexpectedToken; + return self.advance(); +} + +fn matchKeyword(self: *Parser, keyword: []const u8) bool { + const tok = self.peek(); + if (tok == .name and std.mem.eql(u8, tok.name, keyword)) { + _ = self.advance(); + return true; + } + return false; +} + +fn makeExpr(self: *Parser, value: Ast.Expr) Error!*Ast.Expr { + const expr = try self.arena.create(Ast.Expr); + expr.* = value; + return expr; +} + +fn makeBinop(self: *Parser, op: Ast.BinOpKind, left: *Ast.Expr, right: *Ast.Expr) Error!*Ast.Expr { + return try self.makeExpr(.{ .binop = .{ .op = op, .left = left, .right = right } }); +} + +// --- operator-precedence chain --- +// +// Or → And → Equality → Relational → Additive → Mult → Unary → Union → Path + +fn parseExpr(self: *Parser) Error!*Ast.Expr { + return self.parseOrExpr(); +} + +fn parseOrExpr(self: *Parser) Error!*Ast.Expr { + var left = try self.parseAndExpr(); + while (self.matchKeyword("or")) { + const right = try self.parseAndExpr(); + left = try self.makeBinop(.or_, left, right); + } + return left; +} + +fn parseAndExpr(self: *Parser) Error!*Ast.Expr { + var left = try self.parseEqualityExpr(); + while (self.matchKeyword("and")) { + const right = try self.parseEqualityExpr(); + left = try self.makeBinop(.and_, left, right); + } + return left; +} + +fn parseEqualityExpr(self: *Parser) Error!*Ast.Expr { + var left = try self.parseRelationalExpr(); + while (equalityOp(self.peek())) |op| { + _ = self.advance(); + const right = try self.parseRelationalExpr(); + left = try self.makeBinop(op, left, right); + } + return left; +} + +fn parseRelationalExpr(self: *Parser) Error!*Ast.Expr { + var left = try self.parseAdditiveExpr(); + while (relationalOp(self.peek())) |op| { + _ = self.advance(); + const right = try self.parseAdditiveExpr(); + left = try self.makeBinop(op, left, right); + } + return left; +} + +fn parseAdditiveExpr(self: *Parser) Error!*Ast.Expr { + var left = try self.parseMultExpr(); + while (additiveOp(self.peek())) |op| { + _ = self.advance(); + const right = try self.parseMultExpr(); + left = try self.makeBinop(op, left, right); + } + return left; +} + +// After a complete unary expression, `*` is multiply; `div`/`mod` are +// operator-position keywords (tokenized as Name). +fn parseMultExpr(self: *Parser) Error!*Ast.Expr { + var left = try self.parseUnaryExpr(); + while (multOp(self.peek())) |op| { + _ = self.advance(); + const right = try self.parseUnaryExpr(); + left = try self.makeBinop(op, left, right); + } + return left; +} + +fn parseUnaryExpr(self: *Parser) Error!*Ast.Expr { + if (self.match(.minus)) { + const operand = try self.parseUnaryExpr(); + return try self.makeExpr(.{ .neg = operand }); + } + return self.parseUnionExpr(); +} + +fn parseUnionExpr(self: *Parser) Error!*Ast.Expr { + var left = try self.parsePathExpr(); + while (self.match(.pipe)) { + const right = try self.parsePathExpr(); + left = try self.makeBinop(.union_, left, right); + } + return left; +} + +// --- path expressions --- + +fn parsePathExpr(self: *Parser) Error!*Ast.Expr { + const t = self.peek(); + + if (t == .slash or t == .double_slash) { + return self.parseAbsPath(); + } + + // Filter-vs-relative-path disambiguation: a primary expression + // starts with `(`, string, number, `$`, or a `name(` where the + // name is *not* a node-type test (`node`/`text`/`comment`/`processing-instruction`). + const is_filter = switch (t) { + .lparen, .string, .number, .dollar => true, + .name => |name| self.lookahead(1) == .lparen and !isNodeTypeName(name), + else => false, + }; + + if (is_filter) { + var primary = try self.parsePrimaryExpr(); + while (self.match(.lbracket)) { + const pred = try self.parseExpr(); + _ = try self.expect(.rbracket); + primary = try self.makeExpr(.{ .filter = .{ .expr = primary, .predicate = pred } }); + } + if (self.peek() == .slash or self.peek() == .double_slash) { + const dsl = self.advance() == .double_slash; + var steps: std.ArrayList(Ast.Step) = .empty; + if (dsl) try steps.append(self.arena, descendantOrSelfStep()); + try self.parseRelStepsInto(&steps); + return try self.makeExpr(.{ .filter_path = .{ + .filter = primary, + .steps = steps.items, + } }); + } + return primary; + } + + return self.parseRelPath(); +} + +fn parseAbsPath(self: *Parser) Error!*Ast.Expr { + var steps: std.ArrayList(Ast.Step) = .empty; + if (self.match(.double_slash)) { + try steps.append(self.arena, descendantOrSelfStep()); + try self.parseRelStepsInto(&steps); + } else { + _ = try self.expect(.slash); + // `/` alone is the document root — no step required. + if (self.canStartStep()) try self.parseRelStepsInto(&steps); + } + return try self.makeExpr(.{ .path = .{ + .absolute = true, + .steps = steps.items, + } }); +} + +fn parseRelPath(self: *Parser) Error!*Ast.Expr { + var steps: std.ArrayList(Ast.Step) = .empty; + try self.parseRelStepsInto(&steps); + return try self.makeExpr(.{ .path = .{ + .absolute = false, + .steps = steps.items, + } }); +} + +fn parseRelStepsInto(self: *Parser, steps: *std.ArrayList(Ast.Step)) Error!void { + try steps.append(self.arena, try self.parseStep()); + while (self.peek() == .slash or self.peek() == .double_slash) { + if (self.advance() == .double_slash) { + try steps.append(self.arena, descendantOrSelfStep()); + } + try steps.append(self.arena, try self.parseStep()); + } +} + +fn canStartStep(self: *const Parser) bool { + return switch (self.peek()) { + .name, .star, .dot, .double_dot, .at => true, + else => false, + }; +} + +fn parseStep(self: *Parser) Error!Ast.Step { + // Abbreviated steps `.` and `..` carry no axis, node-test, or + // predicates — predicates after `.` are a parse error per polyfill. + if (self.match(.dot)) return abbreviatedStep(.self); + if (self.match(.double_dot)) return abbreviatedStep(.parent); + + var axis: Ast.Axis = .child; + if (self.match(.at)) { + axis = .attribute; + } else if (self.peek() == .name and self.lookahead(1) == .double_colon) { + const axis_name = self.advance().name; + _ = self.advance(); // `::` + axis = parseAxisName(axis_name); + } + + const node_test = try self.parseNodeTest(); + + var preds: std.ArrayList(*Ast.Expr) = .empty; + while (self.match(.lbracket)) { + const pred = try self.parseExpr(); + _ = try self.expect(.rbracket); + try preds.append(self.arena, pred); + } + + return .{ .axis = axis, .node_test = node_test, .predicates = preds.items }; +} + +fn parseNodeTest(self: *Parser) Error!Ast.NodeTest { + if (self.match(.star)) return .{ .name = "*" }; + if (self.peek() != .name) return error.ExpectedNodeTest; + + const name = self.peek().name; + if (typeTestKind(name)) |type_test| { + if (self.lookahead(1) == .lparen) { + _ = self.advance(); // name + _ = self.advance(); // `(` + // `processing-instruction("target")` consumes the literal but ignores it (decision #3 stub). + if (type_test == .processing_instruction and self.peek() == .string) { + _ = self.advance(); + } + _ = try self.expect(.rparen); + return .{ .type_test = type_test }; + } + } + _ = self.advance(); + return .{ .name = name }; +} + +fn parsePrimaryExpr(self: *Parser) Error!*Ast.Expr { + switch (self.peek()) { + .string => |s| { + _ = self.advance(); + return try self.makeExpr(.{ .literal = s }); + }, + .number => |n| { + _ = self.advance(); + return try self.makeExpr(.{ .number = n }); + }, + .dollar => { + _ = self.advance(); + const name_tok = try self.expect(.name); + return try self.makeExpr(.{ .var_ref = name_tok.name }); + }, + .lparen => { + _ = self.advance(); + const e = try self.parseExpr(); + _ = try self.expect(.rparen); + return e; + }, + .name => |name| { + _ = self.advance(); + _ = try self.expect(.lparen); + var args: std.ArrayList(*Ast.Expr) = .empty; + if (self.peek() != .rparen) { + try args.append(self.arena, try self.parseExpr()); + while (self.match(.comma)) { + try args.append(self.arena, try self.parseExpr()); + } + } + _ = try self.expect(.rparen); + return try self.makeExpr(.{ .fn_call = .{ .name = name, .args = args.items } }); + }, + else => return error.ExpectedPrimaryExpr, + } +} + +// --- pure helpers --- + +fn equalityOp(t: Token) ?Ast.BinOpKind { + return switch (t) { + .eq => .eq, + .neq => .neq, + else => null, + }; +} + +fn relationalOp(t: Token) ?Ast.BinOpKind { + return switch (t) { + .lt => .lt, + .gt => .gt, + .lte => .lte, + .gte => .gte, + else => null, + }; +} + +fn additiveOp(t: Token) ?Ast.BinOpKind { + return switch (t) { + .plus => .add, + .minus => .sub, + else => null, + }; +} + +fn multOp(t: Token) ?Ast.BinOpKind { + return switch (t) { + .star => .mul, + .name => |name| blk: { + if (std.mem.eql(u8, name, "div")) break :blk .div; + if (std.mem.eql(u8, name, "mod")) break :blk .mod; + break :blk null; + }, + else => null, + }; +} + +fn descendantOrSelfStep() Ast.Step { + return .{ + .axis = .descendant_or_self, + .node_test = .{ .type_test = .node }, + .predicates = &.{}, + }; +} + +fn abbreviatedStep(axis: Ast.Axis) Ast.Step { + return .{ + .axis = axis, + .node_test = .{ .type_test = .node }, + .predicates = &.{}, + }; +} + +fn isNodeTypeName(name: []const u8) bool { + return typeTestKind(name) != null; +} + +const type_test_lookup = std.StaticStringMap(Ast.TypeTest).initComptime(.{ + .{ "node", .node }, + .{ "text", .text }, + .{ "comment", .comment }, + .{ "processing-instruction", .processing_instruction }, +}); + +fn typeTestKind(name: []const u8) ?Ast.TypeTest { + return type_test_lookup.get(name); +} + +const axis_lookup = std.StaticStringMap(Ast.Axis).initComptime(.{ + .{ "child", .child }, + .{ "descendant", .descendant }, + .{ "descendant-or-self", .descendant_or_self }, + .{ "self", .self }, + .{ "parent", .parent }, + .{ "ancestor", .ancestor }, + .{ "ancestor-or-self", .ancestor_or_self }, + .{ "following-sibling", .following_sibling }, + .{ "preceding-sibling", .preceding_sibling }, + .{ "following", .following }, + .{ "preceding", .preceding }, + .{ "attribute", .attribute }, + .{ "namespace", .namespace }, +}); + +fn parseAxisName(name: []const u8) Ast.Axis { + return axis_lookup.get(name) orelse .unknown; +} + +// --------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------- + +const testing = std.testing; + +fn parseFixture(input: []const u8) !struct { arena: std.heap.ArenaAllocator, expr: *Ast.Expr } { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + errdefer arena.deinit(); + const expr = try parse(arena.allocator(), input); + return .{ .arena = arena, .expr = expr }; +} + +test "XPath.Parser: number literal" { + var fx = try parseFixture("42"); + defer fx.arena.deinit(); + try testing.expectEqual(@as(f64, 42), fx.expr.number); +} + +test "XPath.Parser: string literal" { + var fx = try parseFixture("'hello'"); + defer fx.arena.deinit(); + try testing.expectEqualStrings("hello", fx.expr.literal); +} + +test "XPath.Parser: variable reference strips $" { + var fx = try parseFixture("$x"); + defer fx.arena.deinit(); + try testing.expectEqualStrings("x", fx.expr.var_ref); +} + +test "XPath.Parser: parenthesized expression unwraps" { + var fx = try parseFixture("(42)"); + defer fx.arena.deinit(); + try testing.expectEqual(@as(f64, 42), fx.expr.number); +} + +test "XPath.Parser: function call with no args" { + var fx = try parseFixture("position()"); + defer fx.arena.deinit(); + try testing.expectEqualStrings("position", fx.expr.fn_call.name); + try testing.expectEqual(@as(usize, 0), fx.expr.fn_call.args.len); +} + +test "XPath.Parser: function call with args" { + var fx = try parseFixture("substring('abc', 2, 1)"); + defer fx.arena.deinit(); + const fc = fx.expr.fn_call; + try testing.expectEqualStrings("substring", fc.name); + try testing.expectEqual(@as(usize, 3), fc.args.len); + try testing.expectEqualStrings("abc", fc.args[0].literal); + try testing.expectEqual(@as(f64, 2), fc.args[1].number); + try testing.expectEqual(@as(f64, 1), fc.args[2].number); +} + +test "XPath.Parser: arithmetic precedence — mul binds tighter than add" { + var fx = try parseFixture("1 + 2 * 3"); + defer fx.arena.deinit(); + // Expected AST: add(1, mul(2, 3)) + const top = fx.expr.binop; + try testing.expectEqual(Ast.BinOpKind.add, top.op); + try testing.expectEqual(@as(f64, 1), top.left.number); + const mul = top.right.binop; + try testing.expectEqual(Ast.BinOpKind.mul, mul.op); + try testing.expectEqual(@as(f64, 2), mul.left.number); + try testing.expectEqual(@as(f64, 3), mul.right.number); +} + +test "XPath.Parser: arithmetic left-associativity" { + var fx = try parseFixture("1 - 2 - 3"); + defer fx.arena.deinit(); + // Expected AST: sub(sub(1, 2), 3) + const top = fx.expr.binop; + try testing.expectEqual(Ast.BinOpKind.sub, top.op); + try testing.expectEqual(@as(f64, 3), top.right.number); + const inner = top.left.binop; + try testing.expectEqual(Ast.BinOpKind.sub, inner.op); + try testing.expectEqual(@as(f64, 1), inner.left.number); + try testing.expectEqual(@as(f64, 2), inner.right.number); +} + +test "XPath.Parser: div and mod are operator-position keywords" { + var fx = try parseFixture("7 div 2"); + defer fx.arena.deinit(); + try testing.expectEqual(Ast.BinOpKind.div, fx.expr.binop.op); + + var fx2 = try parseFixture("7 mod 2"); + defer fx2.arena.deinit(); + try testing.expectEqual(Ast.BinOpKind.mod, fx2.expr.binop.op); +} + +test "XPath.Parser: comparison operators" { + inline for (.{ + .{ "1 = 2", Ast.BinOpKind.eq }, + .{ "1 != 2", Ast.BinOpKind.neq }, + .{ "1 < 2", Ast.BinOpKind.lt }, + .{ "1 <= 2", Ast.BinOpKind.lte }, + .{ "1 > 2", Ast.BinOpKind.gt }, + .{ "1 >= 2", Ast.BinOpKind.gte }, + }) |case| { + var fx = try parseFixture(case[0]); + defer fx.arena.deinit(); + try testing.expectEqual(case[1], fx.expr.binop.op); + } +} + +test "XPath.Parser: logical or/and short-circuit chain" { + var fx = try parseFixture("a or b and c"); + defer fx.arena.deinit(); + // Expected AST: or(path(a), and(path(b), path(c))) — and binds tighter + const top = fx.expr.binop; + try testing.expectEqual(Ast.BinOpKind.or_, top.op); + try testing.expectEqual(Ast.BinOpKind.and_, top.right.binop.op); +} + +test "XPath.Parser: unary minus" { + var fx = try parseFixture("-1"); + defer fx.arena.deinit(); + try testing.expectEqual(@as(f64, 1), fx.expr.neg.number); +} + +test "XPath.Parser: union" { + var fx = try parseFixture("a | b"); + defer fx.arena.deinit(); + try testing.expectEqual(Ast.BinOpKind.union_, fx.expr.binop.op); +} + +test "XPath.Parser: absolute path / alone is document root" { + var fx = try parseFixture("/"); + defer fx.arena.deinit(); + const path = fx.expr.path; + try testing.expect(path.absolute); + try testing.expectEqual(@as(usize, 0), path.steps.len); +} + +test "XPath.Parser: absolute path /foo" { + var fx = try parseFixture("/foo"); + defer fx.arena.deinit(); + const path = fx.expr.path; + try testing.expect(path.absolute); + try testing.expectEqual(@as(usize, 1), path.steps.len); + try testing.expectEqualStrings("foo", path.steps[0].node_test.name); +} + +test "XPath.Parser: //foo expands to descendant-or-self::node()/foo" { + var fx = try parseFixture("//foo"); + defer fx.arena.deinit(); + const path = fx.expr.path; + try testing.expect(path.absolute); + try testing.expectEqual(@as(usize, 2), path.steps.len); + try testing.expectEqual(Ast.Axis.descendant_or_self, path.steps[0].axis); + try testing.expectEqual(Ast.TypeTest.node, path.steps[0].node_test.type_test); + try testing.expectEqualStrings("foo", path.steps[1].node_test.name); +} + +test "XPath.Parser: relative path child::foo/bar" { + var fx = try parseFixture("foo/bar"); + defer fx.arena.deinit(); + const path = fx.expr.path; + try testing.expect(!path.absolute); + try testing.expectEqual(@as(usize, 2), path.steps.len); + try testing.expectEqual(Ast.Axis.child, path.steps[0].axis); + try testing.expectEqualStrings("foo", path.steps[0].node_test.name); + try testing.expectEqualStrings("bar", path.steps[1].node_test.name); +} + +test "XPath.Parser: abbreviated steps . and .." { + var fx = try parseFixture("./.."); + defer fx.arena.deinit(); + const path = fx.expr.path; + try testing.expectEqual(@as(usize, 2), path.steps.len); + try testing.expectEqual(Ast.Axis.self, path.steps[0].axis); + try testing.expectEqual(Ast.Axis.parent, path.steps[1].axis); +} + +test "XPath.Parser: attribute axis @class" { + var fx = try parseFixture("@class"); + defer fx.arena.deinit(); + const step = fx.expr.path.steps[0]; + try testing.expectEqual(Ast.Axis.attribute, step.axis); + try testing.expectEqualStrings("class", step.node_test.name); +} + +test "XPath.Parser: all 12 named axes parse correctly" { + inline for (.{ + .{ "child::a", Ast.Axis.child }, + .{ "descendant::a", Ast.Axis.descendant }, + .{ "descendant-or-self::a", Ast.Axis.descendant_or_self }, + .{ "self::a", Ast.Axis.self }, + .{ "parent::a", Ast.Axis.parent }, + .{ "ancestor::a", Ast.Axis.ancestor }, + .{ "ancestor-or-self::a", Ast.Axis.ancestor_or_self }, + .{ "following-sibling::a", Ast.Axis.following_sibling }, + .{ "preceding-sibling::a", Ast.Axis.preceding_sibling }, + .{ "following::a", Ast.Axis.following }, + .{ "preceding::a", Ast.Axis.preceding }, + .{ "namespace::a", Ast.Axis.namespace }, + }) |case| { + var fx = try parseFixture(case[0]); + defer fx.arena.deinit(); + try testing.expectEqual(case[1], fx.expr.path.steps[0].axis); + } +} + +test "XPath.Parser: unknown axis name maps to .unknown — polyfill parity" { + var fx = try parseFixture("wibble::a"); + defer fx.arena.deinit(); + try testing.expectEqual(Ast.Axis.unknown, fx.expr.path.steps[0].axis); +} + +test "XPath.Parser: wildcard *" { + var fx = try parseFixture("*"); + defer fx.arena.deinit(); + try testing.expectEqualStrings("*", fx.expr.path.steps[0].node_test.name); +} + +test "XPath.Parser: namespace-prefixed name and wildcard" { + var fx = try parseFixture("svg:rect"); + defer fx.arena.deinit(); + try testing.expectEqualStrings("svg:rect", fx.expr.path.steps[0].node_test.name); + + var fx2 = try parseFixture("svg:*"); + defer fx2.arena.deinit(); + try testing.expectEqualStrings("svg:*", fx2.expr.path.steps[0].node_test.name); +} + +test "XPath.Parser: node-type tests" { + inline for (.{ + .{ "node()", Ast.TypeTest.node }, + .{ "text()", Ast.TypeTest.text }, + .{ "comment()", Ast.TypeTest.comment }, + .{ "processing-instruction()", Ast.TypeTest.processing_instruction }, + }) |case| { + var fx = try parseFixture(case[0]); + defer fx.arena.deinit(); + try testing.expectEqual(case[1], fx.expr.path.steps[0].node_test.type_test); + } +} + +test "XPath.Parser: processing-instruction with literal target — consumed but ignored" { + var fx = try parseFixture("processing-instruction('xml-stylesheet')"); + defer fx.arena.deinit(); + try testing.expectEqual(Ast.TypeTest.processing_instruction, fx.expr.path.steps[0].node_test.type_test); +} + +test "XPath.Parser: predicate on step" { + var fx = try parseFixture("p[1]"); + defer fx.arena.deinit(); + const step = fx.expr.path.steps[0]; + try testing.expectEqual(@as(usize, 1), step.predicates.len); + try testing.expectEqual(@as(f64, 1), step.predicates[0].number); +} + +test "XPath.Parser: multi-predicate step" { + var fx = try parseFixture("p[1][@x]"); + defer fx.arena.deinit(); + const step = fx.expr.path.steps[0]; + try testing.expectEqual(@as(usize, 2), step.predicates.len); +} + +test "XPath.Parser: filter expression with predicate parses as Filter, not Step" { + var fx = try parseFixture("(//a)[1]"); + defer fx.arena.deinit(); + // Top level is Filter wrapping a parenthesized path with one predicate. + const filt = fx.expr.filter; + try testing.expectEqual(@as(f64, 1), filt.predicate.number); + try testing.expect(filt.expr.path.absolute); +} + +test "XPath.Parser: filter with multi-predicate nests" { + var fx = try parseFixture("(//a)[1][2]"); + defer fx.arena.deinit(); + const outer = fx.expr.filter; + try testing.expectEqual(@as(f64, 2), outer.predicate.number); + const inner = outer.expr.filter; + try testing.expectEqual(@as(f64, 1), inner.predicate.number); +} + +test "XPath.Parser: filter with location-path tail (filter_path)" { + var fx = try parseFixture("(//a)/b"); + defer fx.arena.deinit(); + const fp = fx.expr.filter_path; + try testing.expect(fp.filter.path.absolute); + try testing.expectEqual(@as(usize, 1), fp.steps.len); + try testing.expectEqualStrings("b", fp.steps[0].node_test.name); +} + +test "XPath.Parser: filter with // tail prepends descendant-or-self" { + var fx = try parseFixture("(//a)//b"); + defer fx.arena.deinit(); + const fp = fx.expr.filter_path; + try testing.expectEqual(@as(usize, 2), fp.steps.len); + try testing.expectEqual(Ast.Axis.descendant_or_self, fp.steps[0].axis); + try testing.expectEqualStrings("b", fp.steps[1].node_test.name); +} + +test "XPath.Parser: function call followed by predicate" { + var fx = try parseFixture("id('x')[1]"); + defer fx.arena.deinit(); + const filt = fx.expr.filter; + try testing.expectEqual(@as(f64, 1), filt.predicate.number); + try testing.expectEqualStrings("id", filt.expr.fn_call.name); +} + +test "XPath.Parser: complex representative expression" { + var fx = try parseFixture("//div[@class='active']/p[position()<=last()-1]"); + defer fx.arena.deinit(); + const path = fx.expr.path; + try testing.expect(path.absolute); + try testing.expectEqual(@as(usize, 3), path.steps.len); + try testing.expectEqual(Ast.Axis.descendant_or_self, path.steps[0].axis); + try testing.expectEqualStrings("div", path.steps[1].node_test.name); + try testing.expectEqual(@as(usize, 1), path.steps[1].predicates.len); + try testing.expectEqualStrings("p", path.steps[2].node_test.name); + try testing.expectEqual(@as(usize, 1), path.steps[2].predicates.len); +} + +fn expectParseError(input: []const u8, expected: anyerror) !void { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + try testing.expectError(expected, parse(arena.allocator(), input)); +} + +test "XPath.Parser: error on unbalanced paren" { + try expectParseError("(1", error.UnexpectedToken); +} + +test "XPath.Parser: error on unbalanced bracket" { + try expectParseError("p[1", error.UnexpectedToken); +} + +test "XPath.Parser: error on missing node test" { + try expectParseError("child::", error.ExpectedNodeTest); +} + +test "XPath.Parser: bare `+` falls through to step and reports missing node test" { + // Matches polyfill: + isn't a path/primary start, so the parser + // ends up in parseStep with no name to use as node test. + try expectParseError("+", error.ExpectedNodeTest); +} + +test "XPath.Parser: error on trailing tokens" { + try expectParseError("1 2", error.UnexpectedToken); +} + +test "XPath.Parser: empty string falls through to step and reports missing node test" { + try expectParseError("", error.ExpectedNodeTest); +} + +test "XPath.Parser: 91-case gem battery — every expression parses" { + // Source: capybara-lightpanda spec/features/driver_spec.rb, + // describe "XPath polyfill — XPath 1.0 conformance" battery. + // Phase 2 acceptance criterion (references/phases.md). + const battery = [_][]const u8{ + "/html", + "/html/body", + "/", + "//h1", + "//ul/li", + "//ul//li", + ".", + ".//li", + "//section/*", + "//*[@id='heading']", + "//li[1]/following-sibling::li", + "//li[5]/preceding-sibling::li", + "//li/parent::ul", + "//li/ancestor::body", + "//li/ancestor-or-self::body", + "//li[3]/preceding::li", + "//li[1]/following::li", + "//ul/descendant::li", + "//ul/descendant-or-self::li", + "//section[1]/child::span", + "//*[@id='heading']/self::h1", + "//a[1]/attribute::href", + "//a[1]/@*", + "//li[1]", + "//li[last()]", + "//li[last() - 1]", + "//li[position() = 1]", + "//li[position() > 2]", + "//li[position() mod 2 = 1]", + "(//li)[1]", + "(//section)[2]", + "//li[3]/preceding-sibling::li[1]", + "//li[5]/ancestor::*[1]", + "//li[contains(concat(' ', @class, ' '), ' even ')][2]", + "//*[@id='heading' and @class='primary']", + "//*[@id='heading' or @id='p1']", + "//section[a]", + "//section[count(span) = 2]", + "//ul[count(li) = 5]", + "//tr[td[1]]", + "//tr[td/text() = 'Bob']", + "//*[starts-with(@id, 'link')]", + "//*[normalize-space() = 'Hello World']", + "//*[normalize-space(.) = 'Item 1']", + "//*[concat(@id, '-x') = 'heading-x']", + "//*[substring(@id, 1, 1) = 'p']", + "//*[substring(@id, 2, 1) = '1' and starts-with(@id, 'p')]", + "//p[translate(@id, 'p', 'q') = 'q1']", + "//*[substring-before(@id, '1') = 'p']", + "//*[substring-after(@id, 'lin') = 'k1']", + "//tr[number(td[2]) > 28]", + "//tr[floor(number(td[2]) div 10) = 3]", + "//tr[ceiling(number(td[2]) div 10) = 3]", + "//tr[round(number(td[2]) div 10) = 3]", + "//ul[sum(li/@data-len) = 0]", + "//p[boolean(@lang)]", + "//*[false()]", + "//*[name() = 'h1']", + "//*[local-name() = 'h1']", + "id('heading')", + "id('heading p1')", + "id(//em/parent::p/@id)", + "//h1 | //title", + "//h1 | //*[@id='p1']", + "//*[@id='heading'] | //*[@id='heading']", + "//li[position() + 1 = 3]", + "//li[position() - 1 = 0]", + "//li[position() * 2 = 4]", + "//li[position() div 2 = 1]", + "//li[(position() mod 2) = 0]", + "//tr[number(td[2]) = 30]", + "//tr[number(td[2]) != 30]", + "//tr[number(td[2]) < 30]", + "//tr[number(td[2]) <= 30]", + "//tr[number(td[2]) > 30]", + "//tr[number(td[2]) >= 30]", + "//tr[td[2] = 30]", + "//tr[td[2] = '30']", + "//comment()", + ".//a[contains(normalize-space(string(.)), 'Click me')]", + ".//input[(./@type = 'text')]", + ".//*[@id='heading']", + ".//li[contains(concat(' ', @class, ' '), ' even ')]", + "//*[@id='heading']/text()", + "//em/parent::p", + "//p[em]", + "//p[not(em)]", + "//section[a/@href = '/foo']", + "//ul/li[last()][position() = last()]", + "//ul[string(count(li)) = '5']", + "//body[count(//*[contains(@class, 'item')]) = 5]", + }; + try testing.expectEqual(@as(usize, 91), battery.len); + + for (battery) |expr| { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + _ = parse(arena.allocator(), expr) catch |err| { + std.debug.print("\n failed to parse: {s}\n error: {s}\n", .{ expr, @errorName(err) }); + return err; + }; + } +} diff --git a/src/browser/xpath/Result.zig b/src/browser/xpath/Result.zig new file mode 100644 index 00000000..c0822054 --- /dev/null +++ b/src/browser/xpath/Result.zig @@ -0,0 +1,200 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! XPath 1.0 runtime values. +//! +//! Mirrors the polyfill's untagged JS values (lib/capybara/lightpanda/ +//! javascripts/index.js, the `evaluate()` return convention): a node-set +//! is a JS array of nodes, and the three scalar types are JS primitives. +//! In Zig we tag the union explicitly. Type coercion (`toString`, +//! `toNumber`, `toBoolean`) follows XPath 1.0 spec §3, with HTML-pragmatic +//! shortcuts inherited from the polyfill (decision #2). + +const std = @import("std"); +const Allocator = std.mem.Allocator; + +const Node = @import("../webapi/Node.zig"); +const CData = Node.CData; + +pub const Result = union(enum) { + /// Owned by the evaluator's arena. Order is significant only at the + /// public boundary, where the evaluator sorts to document order. + node_set: []const *Node, + number: f64, + string: []const u8, + boolean: bool, +}; + +/// XPath spec §5: string-value of a node. +/// +/// - Element / Document: concatenated text descendants (excluding +/// comments and processing-instructions; matches `Node.getTextContent`) +/// - Attribute: attribute value +/// - Text / Comment / CDATA / PI: the node's data +/// - DocumentType / DocumentFragment: empty (matches polyfill's +/// `nodeValue || textContent || ''` fallthrough) +/// +/// The returned slice is borrowed from the node for cdata/attribute +/// (cheap, no allocation) and arena-allocated for element/document +/// (concatenation buffer). +pub fn stringValueOf(arena: Allocator, node: *Node) error{WriteFailed}![]const u8 { + return switch (node._type) { + .attribute => |attr| attr._value.str(), + .cdata => |cd| cd._data.str(), + .element, .document => blk: { + var buf = std.Io.Writer.Allocating.init(arena); + try node.getTextContent(&buf.writer); + break :blk buf.written(); + }, + .document_type, .document_fragment => "", + }; +} + +pub fn toBoolean(val: Result) bool { + return switch (val) { + .boolean => |b| b, + .number => |n| n != 0 and !std.math.isNan(n), + .string => |s| s.len > 0, + .node_set => |ns| ns.len > 0, + }; +} + +/// Numeric coercion. Empty / whitespace-only strings produce NaN +/// (XPath spec §4.4 — matches JS `Number(' ') === 0` *not* applying +/// because the polyfill calls `s.trim() === '' ? NaN : Number(s)`). +pub fn toNumber(arena: Allocator, val: Result) error{WriteFailed}!f64 { + return switch (val) { + .number => |n| n, + .boolean => |b| if (b) 1 else 0, + .string => |s| stringToNumber(s), + .node_set => |ns| blk: { + if (ns.len == 0) break :blk std.math.nan(f64); + const sv = try stringValueOf(arena, ns[0]); + break :blk stringToNumber(sv); + }, + }; +} + +pub fn stringToNumber(s: []const u8) f64 { + const trimmed = std.mem.trim(u8, s, &std.ascii.whitespace); + if (trimmed.len == 0) return std.math.nan(f64); + return std.fmt.parseFloat(f64, trimmed) catch std.math.nan(f64); +} + +/// String coercion. Allocates only for `.number` (formatting) and for +/// `.node_set` whose first node is an Element/Document (text content +/// concatenation). Boolean → static string. String → borrowed. +pub fn toString(arena: Allocator, val: Result) error{ OutOfMemory, WriteFailed }![]const u8 { + return switch (val) { + .string => |s| s, + .boolean => |b| if (b) "true" else "false", + .number => |n| try numberToString(arena, n), + .node_set => |ns| if (ns.len == 0) "" else try stringValueOf(arena, ns[0]), + }; +} + +/// XPath spec §4.2: NaN, ±0, and ±Infinity have specific spellings; +/// integer-valued numbers print without trailing `.0`. Diverges from +/// Zig's default `{d}` which prints `nan`/`inf` and may emit `-0`. +pub fn numberToString(arena: Allocator, n: f64) error{OutOfMemory}![]const u8 { + if (std.math.isNan(n)) return "NaN"; + if (std.math.isPositiveInf(n)) return "Infinity"; + if (std.math.isNegativeInf(n)) return "-Infinity"; + if (n == 0) return "0"; // covers +0 and -0 + if (@trunc(n) == n and n >= -9.007199254740992e15 and n <= 9.007199254740992e15) { + return std.fmt.allocPrint(arena, "{d}", .{@as(i64, @intFromFloat(n))}); + } + return std.fmt.allocPrint(arena, "{d}", .{n}); +} + +const testing = std.testing; + +test "Result: toBoolean" { + try testing.expect(toBoolean(.{ .boolean = true })); + try testing.expect(!toBoolean(.{ .boolean = false })); + try testing.expect(toBoolean(.{ .number = 1 })); + try testing.expect(!toBoolean(.{ .number = 0 })); + try testing.expect(!toBoolean(.{ .number = std.math.nan(f64) })); + try testing.expect(toBoolean(.{ .string = "x" })); + try testing.expect(!toBoolean(.{ .string = "" })); + try testing.expect(!toBoolean(.{ .node_set = &.{} })); +} + +test "Result: stringToNumber" { + try testing.expectEqual(@as(f64, 42), stringToNumber("42")); + try testing.expectEqual(@as(f64, 3.14), stringToNumber("3.14")); + try testing.expectEqual(@as(f64, -1), stringToNumber("-1")); + try testing.expectEqual(@as(f64, 5), stringToNumber(" 5 ")); + try testing.expect(std.math.isNan(stringToNumber(""))); + try testing.expect(std.math.isNan(stringToNumber(" "))); + try testing.expect(std.math.isNan(stringToNumber("abc"))); +} + +test "Result: numberToString — integers print without decimal" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + try testing.expectEqualStrings("5", try numberToString(a, 5)); + try testing.expectEqualStrings("0", try numberToString(a, 0)); + try testing.expectEqualStrings("0", try numberToString(a, -0.0)); + try testing.expectEqualStrings("-1", try numberToString(a, -1)); + try testing.expectEqualStrings("42", try numberToString(a, 42.0)); +} + +test "Result: numberToString — special values" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + try testing.expectEqualStrings("NaN", try numberToString(a, std.math.nan(f64))); + try testing.expectEqualStrings("Infinity", try numberToString(a, std.math.inf(f64))); + try testing.expectEqualStrings("-Infinity", try numberToString(a, -std.math.inf(f64))); +} + +test "Result: numberToString — floats" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + const a = arena.allocator(); + try testing.expectEqualStrings("3.14", try numberToString(a, 3.14)); + try testing.expectEqualStrings("0.5", try numberToString(a, 0.5)); +} + +test "Result: toString — boolean returns static string" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + try testing.expectEqualStrings("true", try toString(arena.allocator(), .{ .boolean = true })); + try testing.expectEqualStrings("false", try toString(arena.allocator(), .{ .boolean = false })); +} + +test "Result: toString — node-set with empty arr is empty" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + try testing.expectEqualStrings("", try toString(arena.allocator(), .{ .node_set = &.{} })); +} + +test "Result: toNumber — empty node-set is NaN" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + try testing.expect(std.math.isNan(try toNumber(arena.allocator(), .{ .node_set = &.{} }))); +} + +test "Result: toNumber — boolean coerces to 0/1" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + try testing.expectEqual(@as(f64, 1), try toNumber(arena.allocator(), .{ .boolean = true })); + try testing.expectEqual(@as(f64, 0), try toNumber(arena.allocator(), .{ .boolean = false })); +} diff --git a/src/browser/xpath/Tokenizer.zig b/src/browser/xpath/Tokenizer.zig new file mode 100644 index 00000000..7b3a7a27 --- /dev/null +++ b/src/browser/xpath/Tokenizer.zig @@ -0,0 +1,466 @@ +// Copyright (C) 2023-2026 Lightpanda (Selecy SAS) +// +// Francis Bouvier +// Pierre Tachoire +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +//! XPath 1.0 expression tokenizer. +//! +//! Mirrors the polyfill `tokenize()` in capybara-lightpanda +//! (lib/capybara/lightpanda/javascripts/index.js) to preserve its +//! HTML-pragmatic behavior: lenient whitespace, case-preserving names, +//! no escape processing in string literals (use the other quote type +//! to embed), unknown characters silently skipped. +//! +//! The tokenizer borrows from the input slice and never allocates. +//! `next()` always returns a token; `.eof` is terminal and idempotent. + +const std = @import("std"); + +const Tokenizer = @This(); + +pub const Token = union(enum) { + /// String literal: `'foo'` or `"foo"`. Quotes are stripped; escapes + /// are not interpreted (the polyfill takes the raw substring). + string: []const u8, + + /// Numeric literal: `123`, `1.5`, `.5`, `5.`. f64 matches the + /// runtime number type. + number: f64, + + /// Bare identifier — element/function/axis name, an `or`/`and`/ + /// `div`/`mod` keyword, or a namespace-prefixed name (`prefix:local`, + /// `prefix:*`). The colon and optional wildcard are preserved + /// verbatim so the parser can split. + name: []const u8, + + slash, // `/` + double_slash, // `//` + dot, // `.` + double_dot, // `..` + at, // `@` + lparen, // `(` + rparen, // `)` + lbracket, // `[` + rbracket, // `]` + comma, // `,` + pipe, // `|` + eq, // `=` + neq, // `!=` + lt, // `<` + lte, // `<=` + gt, // `>` + gte, // `>=` + plus, // `+` + minus, // `-` + star, // `*` + dollar, // `$` + double_colon, // `::` + eof, +}; + +input: []const u8, +position: usize = 0, + +fn isEof(self: *const Tokenizer) bool { + return self.position >= self.input.len; +} + +// True iff the input has at least `n` bytes left after the current one +// — i.e. `byteAt(n)` will not read past the end. +fn hasAtLeast(self: *const Tokenizer, n: usize) bool { + return self.position + n < self.input.len; +} + +fn byteAt(self: *const Tokenizer, offset: usize) u8 { + return self.input[self.position + offset]; +} + +fn skipWhitespace(self: *Tokenizer) void { + while (!self.isEof()) { + switch (self.input[self.position]) { + ' ', '\t', '\n', '\r' => self.position += 1, + else => return, + } + } +} + +fn isNameStart(c: u8) bool { + return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_'; +} + +fn isNameContinue(c: u8) bool { + return isNameStart(c) or std.ascii.isDigit(c) or c == '-' or c == '.'; +} + +fn consumeString(self: *Tokenizer, quote: u8) Token { + self.position += 1; // opening quote + const start = self.position; + while (!self.isEof() and self.input[self.position] != quote) { + self.position += 1; + } + const value = self.input[start..self.position]; + // Closing quote skipped; at EOF we just emit what we have (polyfill parity). + if (!self.isEof()) self.position += 1; + return .{ .string = value }; +} + +fn consumeNumber(self: *Tokenizer) Token { + const start = self.position; + while (!self.isEof() and std.ascii.isDigit(self.input[self.position])) { + self.position += 1; + } + if (!self.isEof() and self.input[self.position] == '.') { + self.position += 1; + while (!self.isEof() and std.ascii.isDigit(self.input[self.position])) { + self.position += 1; + } + } + // Caller only enters consumeNumber on a digit or `.digit`, so the + // slice is always `\d+(\.\d*)?` or `\.\d+` — both accepted by + // parseFloat (verified against Zig 0.15.2). + const value = std.fmt.parseFloat(f64, self.input[start..self.position]) catch unreachable; + return .{ .number = value }; +} + +fn consumeName(self: *Tokenizer) Token { + const start = self.position; + while (!self.isEof() and isNameContinue(self.input[self.position])) { + self.position += 1; + } + + // Optional namespace prefix: `prefix:local` or `prefix:*`. A `::` + // is the axis separator and belongs to the next token, so peek + // for a single `:` not followed by another `:`. + if (!self.isEof() and self.input[self.position] == ':' and + (self.position + 1 >= self.input.len or self.input[self.position + 1] != ':')) + { + self.position += 1; // `:` + if (!self.isEof() and self.input[self.position] == '*') { + self.position += 1; + } else { + while (!self.isEof() and isNameContinue(self.input[self.position])) { + self.position += 1; + } + } + } + + return .{ .name = self.input[start..self.position] }; +} + +pub fn next(self: *Tokenizer) Token { + while (true) { + self.skipWhitespace(); + if (self.isEof()) return .eof; + + const c = self.byteAt(0); + + if (c == '"' or c == '\'') { + return self.consumeString(c); + } + + if (std.ascii.isDigit(c) or (c == '.' and self.hasAtLeast(1) and std.ascii.isDigit(self.byteAt(1)))) { + return self.consumeNumber(); + } + + if (self.hasAtLeast(1)) { + const c2 = self.byteAt(1); + switch (c) { + '/' => if (c2 == '/') { + self.position += 2; + return .double_slash; + }, + ':' => if (c2 == ':') { + self.position += 2; + return .double_colon; + }, + '!' => if (c2 == '=') { + self.position += 2; + return .neq; + }, + '<' => if (c2 == '=') { + self.position += 2; + return .lte; + }, + '>' => if (c2 == '=') { + self.position += 2; + return .gte; + }, + '.' => if (c2 == '.') { + self.position += 2; + return .double_dot; + }, + else => {}, + } + } + + const single: ?Token = switch (c) { + '(' => .lparen, + ')' => .rparen, + '[' => .lbracket, + ']' => .rbracket, + ',' => .comma, + '|' => .pipe, + '=' => .eq, + '<' => .lt, + '>' => .gt, + '+' => .plus, + '-' => .minus, + '*' => .star, + '$' => .dollar, + '/' => .slash, + '@' => .at, + '.' => .dot, + else => null, + }; + if (single) |tok| { + self.position += 1; + return tok; + } + + if (isNameStart(c)) { + return self.consumeName(); + } + + // Polyfill parity (decision #2): unknown characters are + // silently skipped, never an error. + self.position += 1; + } +} + +const testing = std.testing; + +fn expectTokens(input: []const u8, expected: []const Token) !void { + var tokenizer = Tokenizer{ .input = input }; + for (expected) |exp| { + const got = tokenizer.next(); + try testing.expectEqualDeep(exp, got); + } +} + +test "XPath.Tokenizer: empty input emits EOF" { + try expectTokens("", &.{.eof}); +} + +test "XPath.Tokenizer: only whitespace emits EOF" { + try expectTokens(" \t\n\r ", &.{.eof}); +} + +test "XPath.Tokenizer: EOF idempotent past end" { + var t = Tokenizer{ .input = "" }; + try testing.expectEqual(Token.eof, t.next()); + try testing.expectEqual(Token.eof, t.next()); + try testing.expectEqual(Token.eof, t.next()); +} + +test "XPath.Tokenizer: single-char operators" { + try expectTokens("()[],|=<>+-*$/@.", &.{ + .lparen, .rparen, .lbracket, .rbracket, .comma, .pipe, + .eq, .lt, .gt, .plus, .minus, .star, + .dollar, .slash, .at, .dot, .eof, + }); +} + +test "XPath.Tokenizer: two-char operators" { + try expectTokens("// :: != <= >= ..", &.{ + .double_slash, .double_colon, .neq, .lte, .gte, .double_dot, .eof, + }); +} + +test "XPath.Tokenizer: two-char vs single-char disambiguation" { + try expectTokens("/a/b", &.{ + .slash, .{ .name = "a" }, .slash, .{ .name = "b" }, .eof, + }); + try expectTokens("//a", &.{ .double_slash, .{ .name = "a" }, .eof }); + try expectTokens("a 1 and q[1] == '/') return true; + if (q[0] == '(' and q.len > 1) { + if (q[1] == '/') return true; + if (q[1] == '.' and q.len > 2 and q[2] == '/') return true; + } + return std.mem.indexOf(u8, q, "::") != null; +} + // https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-performSearch fn performSearch(cmd: *CDP.Command) !void { const params = (try cmd.params(struct { @@ -100,15 +116,23 @@ fn performSearch(cmd: *CDP.Command) !void { const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded; const frame = bc.session.currentFrame() orelse return error.FrameNotLoaded; - const list = try Selector.querySelectorAll(frame.window._document.asNode(), params.query, frame); + const root = frame.window._document.asNode(); + + if (isXPathQuery(params.query)) { + const arena = try frame.getArena(.medium, "DOM.performSearch"); + defer frame.releaseArena(arena); + const nodes = try xpath.searchAll(arena, frame, root, params.query); + return finishSearch(cmd, bc, nodes); + } + + const list = try Selector.querySelectorAll(root, params.query, frame); defer list.deinit(frame._page); + return finishSearch(cmd, bc, list._nodes); +} - const search = try bc.node_search_list.create(list._nodes); - - // dispatch setChildNodesEvents to inform the client of the subpart of node - // tree covering the results. - try dispatchSetChildNodes(cmd, list._nodes); - +fn finishSearch(cmd: *CDP.Command, bc: *CDP.BrowserContext, nodes: []const *DOMNode) !void { + const search = try bc.node_search_list.create(nodes); + try dispatchSetChildNodes(cmd, nodes); return cmd.sendResult(.{ .searchId = search.name, .resultCount = @as(u32, @intCast(search.node_ids.len)), @@ -616,6 +640,70 @@ test "cdp.dom: search flow" { try ctx.expectSentError(-31998, "SearchResultNotFound", .{ .id = 17 }); } +test "cdp.dom: performSearch with XPath" { + var ctx = try testing.context(); + defer ctx.deinit(); + + _ = try ctx.loadBrowserContext(.{ .id = "BID-A", .url = "cdp/perform_search_xpath.html" }); + + try ctx.processMessage(.{ + .id = 20, + .method = "DOM.performSearch", + .params = .{ .query = "//p" }, + }); + try ctx.expectSentResult(.{ .searchId = "0", .resultCount = 3 }, .{ .id = 20 }); + + try ctx.processMessage(.{ + .id = 21, + .method = "DOM.performSearch", + .params = .{ .query = "descendant::p" }, + }); + try ctx.expectSentResult(.{ .searchId = "1", .resultCount = 3 }, .{ .id = 21 }); + + try ctx.processMessage(.{ + .id = 22, + .method = "DOM.performSearch", + .params = .{ .query = "//*[@id='outer']" }, + }); + try ctx.expectSentResult(.{ .searchId = "2", .resultCount = 1 }, .{ .id = 22 }); + + try ctx.processMessage(.{ + .id = 23, + .method = "DOM.performSearch", + .params = .{ .query = "p" }, + }); + try ctx.expectSentResult(.{ .searchId = "3", .resultCount = 3 }, .{ .id = 23 }); + + try ctx.processMessage(.{ + .id = 24, + .method = "DOM.performSearch", + .params = .{ .query = "div p" }, + }); + try ctx.expectSentResult(.{ .searchId = "4", .resultCount = 2 }, .{ .id = 24 }); +} + +test "cdp.dom: isXPathQuery heuristic" { + // XPath-shaped queries — each line covers a distinct heuristic branch. + try std.testing.expect(isXPathQuery("/html")); + try std.testing.expect(isXPathQuery("//p")); + try std.testing.expect(isXPathQuery(".//foo")); + try std.testing.expect(isXPathQuery("(//foo)[1]")); + try std.testing.expect(isXPathQuery("(./bar)[2]")); + try std.testing.expect(isXPathQuery("descendant::p")); + try std.testing.expect(isXPathQuery("ancestor-or-self::*")); + try std.testing.expect(isXPathQuery("//*[@id='x']")); + + // CSS-shaped queries — fall through to the existing path. + try std.testing.expect(!isXPathQuery("")); + try std.testing.expect(!isXPathQuery("p")); + try std.testing.expect(!isXPathQuery("div p")); + try std.testing.expect(!isXPathQuery("#main")); + try std.testing.expect(!isXPathQuery(".cls")); + try std.testing.expect(!isXPathQuery("[data-x]")); + try std.testing.expect(!isXPathQuery("(p)")); // parens without path → CSS + try std.testing.expect(!isXPathQuery(".x")); // leading dot without / +} + test "cdp.dom: querySelector unknown search id" { var ctx = try testing.context(); defer ctx.deinit(); From 0fcd47e1e112489317e868c4cc2d415a03547545 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 29 Apr 2026 00:36:00 +0200 Subject: [PATCH 02/34] xpath: dupe expression into arena before parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Parser borrows string slices from its input for AST literals, names, and var refs. Without duping, the AST holds slices into the JS call_arena, which is reset when the top-level call returns — every subsequent evaluate() of a cached XPathExpression would dereference freed memory. --- src/browser/webapi/XPathExpression.zig | 7 ++++++- src/browser/webapi/XPathResult.zig | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/browser/webapi/XPathExpression.zig b/src/browser/webapi/XPathExpression.zig index 6dba00fb..8c771d2e 100644 --- a/src/browser/webapi/XPathExpression.zig +++ b/src/browser/webapi/XPathExpression.zig @@ -50,7 +50,12 @@ pub fn init(expression: []const u8, frame: *Frame) !*XPathExpression { const arena = try frame.getArena(.tiny, "XPathExpression"); errdefer frame.releaseArena(arena); - const expr = try xpath.Parser.parse(arena, expression); + // The AST borrows string slices from its input (literals, names, + // var refs, function names). `expression` is materialized in the JS + // call_arena and is reclaimed when the top-level call returns, so + // dupe into our long-lived arena before parsing. + const owned = try arena.dupe(u8, expression); + const expr = try xpath.Parser.parse(arena, owned); const xe = try arena.create(XPathExpression); xe.* = .{ ._arena = arena, ._expr = expr }; return xe; diff --git a/src/browser/webapi/XPathResult.zig b/src/browser/webapi/XPathResult.zig index 6bf5095b..1da520ec 100644 --- a/src/browser/webapi/XPathResult.zig +++ b/src/browser/webapi/XPathResult.zig @@ -95,7 +95,12 @@ pub fn fromExpression( const arena = try frame.getArena(.medium, "XPathResult"); errdefer frame.releaseArena(arena); - const expr = try xpath.Parser.parse(arena, expression); + // The AST borrows string slices from its input (literals, names, + // var refs, function names). `expression` is materialized in the JS + // call_arena and is reclaimed when the top-level call returns, so + // dupe into our long-lived arena before parsing. + const owned = try arena.dupe(u8, expression); + const expr = try xpath.Parser.parse(arena, owned); const result = try xpath.Evaluator.evaluate(arena, frame, expr, context_node); return fromResult(arena, requested_type, result); } From 33714a4dfd8d4c47178f7f07a3850056be7c4058 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 29 Apr 2026 00:36:13 +0200 Subject: [PATCH 03/34] cdp: tighten isXPathQuery '::' heuristic A bare indexOf("::") matched CSS pseudo-elements (a::before) and attribute values containing '::' ([data-x="x::y"]), misrouting them to the XPath evaluator. Require an axis-name shape ([a-zA-Z-]) immediately before '::' so only real axis specifiers like descendant::p are dispatched to XPath. --- src/cdp/domains/dom.zig | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/cdp/domains/dom.zig b/src/cdp/domains/dom.zig index 33283b2d..843cc20e 100644 --- a/src/cdp/domains/dom.zig +++ b/src/cdp/domains/dom.zig @@ -104,7 +104,16 @@ fn isXPathQuery(q: []const u8) bool { if (q[1] == '/') return true; if (q[1] == '.' and q.len > 2 and q[2] == '/') return true; } - return std.mem.indexOf(u8, q, "::") != null; + // Require axis-name shape immediately before `::` so CSS pseudo-elements + // (`a::before`) and attribute values containing `::` (`[data-x="x::y"]`) + // aren't misrouted to the XPath evaluator. + var idx: usize = 0; + while (std.mem.indexOfPos(u8, q, idx, "::")) |hit| : (idx = hit + 1) { + if (hit == 0) continue; + const c = q[hit - 1]; + if ((c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '-') return true; + } + return false; } // https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-performSearch From a4abbb6d13fcae99fed3ba2fabebf1253820234d Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 29 Apr 2026 00:37:12 +0200 Subject: [PATCH 04/34] xpath: cache attribute axis nodes via frame lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The attribute axis was calling Entry.toAttribute on every visit, materializing fresh *Attribute structs (plus duped name/value strings) into page-lifetime storage. Repeated XPath queries — the Capybara/ Selenium polling pattern this PR targets — accumulated unbounded copies for the same DOM entries. Route through frame._attribute_lookup so each Entry resolves to a single cached *Attribute, matching List.getAttribute and NamedNodeMap.getAtIndex. --- src/browser/xpath/Evaluator.zig | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig index a16d7b37..11f7a6c1 100644 --- a/src/browser/xpath/Evaluator.zig +++ b/src/browser/xpath/Evaluator.zig @@ -277,11 +277,15 @@ fn appendAttributes(self: *Evaluator, node: *Node, out: *std.ArrayList(*Node)) E const el = node.is(Element) orelse return; var it = el.attributeIterator(); while (it.next()) |entry| { - // Materialize as full Attribute so the result is *Node-uniform. - // Allocates from frame.arena (long-lived); attribute axis is - // typically leaf, so churn is bounded. - const attr = try entry.toAttribute(el, self.frame); - try out.append(self.arena, attr._proto); + // Memoize via frame._attribute_lookup so repeated XPath queries + // (Capybara/Selenium polling) reuse the same *Attribute instead + // of leaking fresh ones into page-lifetime storage on every call. + // Same pattern as Attribute.List.getAttribute / NamedNodeMap.getAtIndex. + const gop = try self.frame._attribute_lookup.getOrPut(self.frame.arena, @intFromPtr(entry)); + if (!gop.found_existing) { + gop.value_ptr.* = try entry.toAttribute(el, self.frame); + } + try out.append(self.arena, gop.value_ptr.*._proto); } } From e7c3e77c414200c028dd450f27a2dcb1b9dc808f Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 29 Apr 2026 00:37:39 +0200 Subject: [PATCH 05/34] xpath: match CDATASection in text() node test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per XPath 1.0 §5.7, the data model has no CDATASection node — CDATA content is part of the text node value. The text() node test was only matching DOM nodeType 3 (Text), silently excluding CDATA sections (nodeType 4) parsed via DOMParser/XMLDocument and inline foreign content like SVG with embedded scripts. --- src/browser/xpath/Evaluator.zig | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig index 11f7a6c1..99202cbd 100644 --- a/src/browser/xpath/Evaluator.zig +++ b/src/browser/xpath/Evaluator.zig @@ -295,7 +295,10 @@ fn matchTest(node: *Node, test_: Ast.NodeTest, axis: Ast.Axis, lowered_name: ?[] return switch (test_) { .type_test => |kind| switch (kind) { .node => true, - .text => node.getNodeType() == 3, + // XPath 1.0 §5.7: the data model has no CDATASection node — + // CDATA content is part of the text node value. Match both + // Text (3) and CDATASection (4) DOM node types. + .text => node.getNodeType() == 3 or node.getNodeType() == 4, .comment => node.getNodeType() == 8, .processing_instruction => node.getNodeType() == 7, }, From 94bcee63222c682dc759003a2a8165e03e2f19e6 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 6 May 2026 18:19:44 +0200 Subject: [PATCH 06/34] xpath: apply review style/convention feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename Result.zig / Ast.zig / Functions.zig to snake_case (no top-level fields per Zig style guide) - Restructure imports across xpath module: lib (std/lp) → relative (further → nearer) → aliases - Move `frame` to last parameter on Evaluator.evaluate, searchAll, Functions.call, idFn (matches js bridge convention); call sites updated in webapi/XPath{Result,Expression}.zig and cdp/domains/dom.zig - Local-pos style in XPathResult.iterateNext --- src/browser/webapi/XPathExpression.zig | 4 ++-- src/browser/webapi/XPathResult.zig | 11 +++++---- src/browser/xpath/Evaluator.zig | 24 ++++++++++--------- src/browser/xpath/Parser.zig | 5 ++-- src/browser/xpath/{Ast.zig => ast.zig} | 0 .../xpath/{Functions.zig => functions.zig} | 16 +++++++------ src/browser/xpath/{Result.zig => result.zig} | 3 ++- src/cdp/domains/dom.zig | 2 +- 8 files changed, 36 insertions(+), 29 deletions(-) rename src/browser/xpath/{Ast.zig => ast.zig} (100%) rename src/browser/xpath/{Functions.zig => functions.zig} (98%) rename src/browser/xpath/{Result.zig => result.zig} (99%) diff --git a/src/browser/webapi/XPathExpression.zig b/src/browser/webapi/XPathExpression.zig index 8c771d2e..b24b6268 100644 --- a/src/browser/webapi/XPathExpression.zig +++ b/src/browser/webapi/XPathExpression.zig @@ -33,7 +33,7 @@ const Node = @import("Node.zig"); const XPathResult = @import("XPathResult.zig"); const xpath = struct { - const Ast = @import("../xpath/Ast.zig"); + const Ast = @import("../xpath/ast.zig"); const Parser = @import("../xpath/Parser.zig"); const Evaluator = @import("../xpath/Evaluator.zig"); }; @@ -76,7 +76,7 @@ pub fn evaluate( const arena = try frame.getArena(.medium, "XPathResult"); errdefer frame.releaseArena(arena); - const eval_result = try xpath.Evaluator.evaluate(arena, frame, self._expr, context_node); + const eval_result = try xpath.Evaluator.evaluate(arena, self._expr, context_node, frame); return XPathResult.fromResult(arena, requested_type, eval_result); } diff --git a/src/browser/webapi/XPathResult.zig b/src/browser/webapi/XPathResult.zig index 1da520ec..44c29b44 100644 --- a/src/browser/webapi/XPathResult.zig +++ b/src/browser/webapi/XPathResult.zig @@ -46,7 +46,7 @@ const Node = @import("Node.zig"); const xpath = struct { const Parser = @import("../xpath/Parser.zig"); const Evaluator = @import("../xpath/Evaluator.zig"); - const Result = @import("../xpath/Result.zig"); + const Result = @import("../xpath/result.zig"); }; const Allocator = std.mem.Allocator; @@ -101,7 +101,7 @@ pub fn fromExpression( // dupe into our long-lived arena before parsing. const owned = try arena.dupe(u8, expression); const expr = try xpath.Parser.parse(arena, owned); - const result = try xpath.Evaluator.evaluate(arena, frame, expr, context_node); + const result = try xpath.Evaluator.evaluate(arena, expr, context_node, frame); return fromResult(arena, requested_type, result); } @@ -220,9 +220,10 @@ pub fn iterateNext(self: *XPathResult) !?*Node { if (self._type != UNORDERED_NODE_ITERATOR_TYPE and self._type != ORDERED_NODE_ITERATOR_TYPE) { return error.InvalidStateError; } - if (self._iter_pos >= self._value.nodes.len) return null; - const node = self._value.nodes[self._iter_pos]; - self._iter_pos += 1; + const pos = self._iter_pos; + if (pos >= self._value.nodes.len) return null; + const node = self._value.nodes[pos]; + self._iter_pos = pos + 1; return node; } diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig index 99202cbd..d654ed8f 100644 --- a/src/browser/xpath/Evaluator.zig +++ b/src/browser/xpath/Evaluator.zig @@ -30,17 +30,19 @@ //! reverse-axis positional predicates evaluate against proximity. const std = @import("std"); -const Allocator = std.mem.Allocator; const lp = @import("lightpanda"); -const Ast = @import("Ast.zig"); -const Parser = @import("Parser.zig"); -const Result = @import("Result.zig"); -const Functions = @import("Functions.zig"); const Node = @import("../webapi/Node.zig"); + +const Ast = @import("ast.zig"); +const Parser = @import("Parser.zig"); +const Result = @import("result.zig"); +const Functions = @import("functions.zig"); + +const Frame = lp.Frame; const Element = Node.Element; const Document = Node.Document; -const Frame = lp.Frame; +const Allocator = std.mem.Allocator; const Evaluator = @This(); @@ -62,7 +64,7 @@ frame: *Frame, /// Public entry. Returns the AST's value; node-sets are sorted into /// document order before return per XPath spec §3.3. -pub fn evaluate(arena: Allocator, frame: *Frame, expr: *const Ast.Expr, context_node: *Node) Error!Result.Result { +pub fn evaluate(arena: Allocator, expr: *const Ast.Expr, context_node: *Node, frame: *Frame) Error!Result.Result { var ev = Evaluator{ .arena = arena, .frame = frame }; const result = try ev.evalExpr(expr, context_node, 1, 1); if (result == .node_set) { @@ -77,9 +79,9 @@ pub const SearchError = Error || Parser.Error; /// evaluate and unwrap the node-set. Top-level scalar expressions yield /// an empty slice (decision #3 — these APIs are for finding nodes, not /// arbitrary computation). -pub fn searchAll(arena: Allocator, frame: *Frame, root: *Node, expression: []const u8) SearchError![]const *Node { +pub fn searchAll(arena: Allocator, root: *Node, expression: []const u8, frame: *Frame) SearchError![]const *Node { const expr = try Parser.parse(arena, expression); - return switch (try evaluate(arena, frame, expr, root)) { + return switch (try evaluate(arena, expr, root, frame)) { .node_set => |ns| ns, else => &.{}, }; @@ -506,7 +508,7 @@ fn evalFnCall(self: *Evaluator, fc: Ast.FnCall, ctx: *Node, pos: usize, size: us const eval_args = try self.arena.alloc(Result.Result, fc.args.len); for (fc.args, 0..) |a, i| eval_args[i] = try self.evalExpr(a, ctx, pos, size); - return Functions.call(self.arena, self.frame, fc.name, eval_args, ctx); + return Functions.call(self.arena, fc.name, eval_args, ctx, self.frame); } // ----- helpers ----- @@ -726,7 +728,7 @@ test "Evaluator: searchAll on scalar expression returns empty (decision #3)" { // the Frame or the context node. Adding a DOM-touching expression // (e.g. `id('x')`) to this list would crash on dereference. inline for (.{ "1 + 2", "'hello'", "true()", "1 = 1" }) |expr| { - const nodes = try searchAll(a, @ptrFromInt(0x1000), @ptrFromInt(0x2000), expr); + const nodes = try searchAll(a, @ptrFromInt(0x2000), expr, @ptrFromInt(0x1000)); try testing.expectEqual(@as(usize, 0), nodes.len); } } diff --git a/src/browser/xpath/Parser.zig b/src/browser/xpath/Parser.zig index 88d25b26..aa969e3f 100644 --- a/src/browser/xpath/Parser.zig +++ b/src/browser/xpath/Parser.zig @@ -25,11 +25,12 @@ //! and is valid for as long as the arena and input outlive it. const std = @import("std"); -const Allocator = std.mem.Allocator; const Tokenizer = @import("Tokenizer.zig"); +const Ast = @import("ast.zig"); + const Token = Tokenizer.Token; -const Ast = @import("Ast.zig"); +const Allocator = std.mem.Allocator; const Parser = @This(); diff --git a/src/browser/xpath/Ast.zig b/src/browser/xpath/ast.zig similarity index 100% rename from src/browser/xpath/Ast.zig rename to src/browser/xpath/ast.zig diff --git a/src/browser/xpath/Functions.zig b/src/browser/xpath/functions.zig similarity index 98% rename from src/browser/xpath/Functions.zig rename to src/browser/xpath/functions.zig index d0ae7eac..52cb4d14 100644 --- a/src/browser/xpath/Functions.zig +++ b/src/browser/xpath/functions.zig @@ -37,14 +37,16 @@ //! Allocations land in the caller's per-evaluation arena. const std = @import("std"); -const Allocator = std.mem.Allocator; const lp = @import("lightpanda"); -const Result = @import("Result.zig"); const Node = @import("../webapi/Node.zig"); + +const Result = @import("result.zig"); + +const Frame = lp.Frame; const Element = Node.Element; const Document = Node.Document; -const Frame = lp.Frame; +const Allocator = std.mem.Allocator; pub const Error = error{ OutOfMemory, @@ -59,14 +61,14 @@ pub const Error = error{ /// last lookup stop. pub fn call( arena: Allocator, - frame: *Frame, name: []const u8, args: []const Result.Result, ctx: *Node, + frame: *Frame, ) Error!Result.Result { // -- Node-set -- if (eql(name, "count")) return .{ .number = countFn(args) }; - if (eql(name, "id")) return idFn(arena, frame, args, ctx); + if (eql(name, "id")) return idFn(arena, args, ctx, frame); if (eql(name, "local-name")) return .{ .string = try localNameFn(arena, args, ctx) }; if (eql(name, "name")) return .{ .string = try nameFn(arena, args, ctx) }; if (eql(name, "namespace-uri")) return .{ .string = "" }; @@ -111,7 +113,7 @@ fn countFn(args: []const Result.Result) f64 { return @floatFromInt(args[0].node_set.len); } -fn idFn(arena: Allocator, frame: *Frame, args: []const Result.Result, ctx: *Node) Error!Result.Result { +fn idFn(arena: Allocator, args: []const Result.Result, ctx: *Node, frame: *Frame) Error!Result.Result { if (args.len == 0) return .{ .node_set = &.{} }; // Polyfill: node-set arg → join `stringVal(n)` of each by ' '. Scalar @@ -345,7 +347,7 @@ fn evalScalar(a: Allocator, src: []const u8) !Result.Result { // Synthetic Frame/Node pointers — the public `evaluate` entry only // touches the Frame for path/axis evaluation. Pure-scalar expressions // (arithmetic, function calls returning scalars) never deref it. - return Evaluator.evaluate(a, @ptrFromInt(0x1000), expr, @ptrFromInt(0x2000)); + return Evaluator.evaluate(a, expr, @ptrFromInt(0x2000), @ptrFromInt(0x1000)); } test "Functions: count() of non-node-set returns 0" { diff --git a/src/browser/xpath/Result.zig b/src/browser/xpath/result.zig similarity index 99% rename from src/browser/xpath/Result.zig rename to src/browser/xpath/result.zig index c0822054..e71efe83 100644 --- a/src/browser/xpath/Result.zig +++ b/src/browser/xpath/result.zig @@ -26,10 +26,11 @@ //! shortcuts inherited from the polyfill (decision #2). const std = @import("std"); -const Allocator = std.mem.Allocator; const Node = @import("../webapi/Node.zig"); + const CData = Node.CData; +const Allocator = std.mem.Allocator; pub const Result = union(enum) { /// Owned by the evaluator's arena. Order is significant only at the diff --git a/src/cdp/domains/dom.zig b/src/cdp/domains/dom.zig index 843cc20e..32af266c 100644 --- a/src/cdp/domains/dom.zig +++ b/src/cdp/domains/dom.zig @@ -130,7 +130,7 @@ fn performSearch(cmd: *CDP.Command) !void { if (isXPathQuery(params.query)) { const arena = try frame.getArena(.medium, "DOM.performSearch"); defer frame.releaseArena(arena); - const nodes = try xpath.searchAll(arena, frame, root, params.query); + const nodes = try xpath.searchAll(arena, root, params.query, frame); return finishSearch(cmd, bc, nodes); } From 379664044e8377f77d818bd4fea78596e5823227 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 6 May 2026 18:21:34 +0200 Subject: [PATCH 07/34] xpath: apply review correctness feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Document.evaluate / XPathEvaluator.evaluate / XPathExpression.evaluate: result_type / requested_type now optional u16 defaulting to ANY_TYPE (matches WHATWG: `optional unsigned short type = 0`). context_node stays nullable with a fallback to the document — preserves the polyfill's behavior asserted by the `default_context` fixture - ast.zig NodeTest: clarify that namespaced names (`prefix:*`, `prefix:local`) are stored verbatim and fall through to a literal match against the node name — consistent with the `namespace::` axis stub (decision #3). Adds a TODO for if the polyfill ever drops the stub - Parser: cap recursive descent at depth 64 with new error.MaxDepthExceeded; depth tracked across parseExpr (parens, predicates, function args) and parseUnaryExpr (chained `-`). Two regression tests cover deep parenthesization and deep unary minus --- src/browser/webapi/Document.zig | 8 ++++-- src/browser/webapi/XPathEvaluator.zig | 4 +-- src/browser/webapi/XPathExpression.zig | 4 +-- src/browser/xpath/Parser.zig | 34 ++++++++++++++++++++++++++ src/browser/xpath/ast.zig | 11 +++++---- 5 files changed, 50 insertions(+), 11 deletions(-) diff --git a/src/browser/webapi/Document.zig b/src/browser/webapi/Document.zig index 095fa48f..9e40d793 100644 --- a/src/browser/webapi/Document.zig +++ b/src/browser/webapi/Document.zig @@ -419,17 +419,21 @@ pub fn evaluate( expression: []const u8, context_node: ?*Node, resolver: ?js.Function, - result_type: u16, + result_type: ?u16, result: ?*XPathResult, frame: *Frame, ) !*XPathResult { // resolver/result are no-ops in HTML mode (decision #2). + // Null/missing context_node falls back to the document — matches the + // polyfill (decision #2). Firefox throws TypeError on a *missing* + // arg, but the bridge can't distinguish "missing" from "explicit + // null" here, so polyfill parity wins for the ambiguity. _ = resolver; _ = result; return XPathResult.fromExpression( expression, context_node orelse self.asNode(), - result_type, + result_type orelse XPathResult.ANY_TYPE, frame, ); } diff --git a/src/browser/webapi/XPathEvaluator.zig b/src/browser/webapi/XPathEvaluator.zig index ec651de0..7cae18b9 100644 --- a/src/browser/webapi/XPathEvaluator.zig +++ b/src/browser/webapi/XPathEvaluator.zig @@ -46,7 +46,7 @@ pub fn evaluate( expression: []const u8, context_node: *Node, resolver: ?js.Function, - requested_type: u16, + requested_type: ?u16, result: ?*XPathResult, frame: *Frame, ) !*XPathResult { @@ -55,7 +55,7 @@ pub fn evaluate( // allocates a fresh instance. _ = resolver; _ = result; - return XPathResult.fromExpression(expression, context_node, requested_type, frame); + return XPathResult.fromExpression(expression, context_node, requested_type orelse XPathResult.ANY_TYPE, frame); } pub fn createExpression( diff --git a/src/browser/webapi/XPathExpression.zig b/src/browser/webapi/XPathExpression.zig index b24b6268..d801ac5a 100644 --- a/src/browser/webapi/XPathExpression.zig +++ b/src/browser/webapi/XPathExpression.zig @@ -64,7 +64,7 @@ pub fn init(expression: []const u8, frame: *Frame) !*XPathExpression { pub fn evaluate( self: *XPathExpression, context_node: *Node, - requested_type: u16, + requested_type: ?u16, result: ?*XPathResult, frame: *Frame, ) !*XPathResult { @@ -77,7 +77,7 @@ pub fn evaluate( errdefer frame.releaseArena(arena); const eval_result = try xpath.Evaluator.evaluate(arena, self._expr, context_node, frame); - return XPathResult.fromResult(arena, requested_type, eval_result); + return XPathResult.fromResult(arena, requested_type orelse XPathResult.ANY_TYPE, eval_result); } pub fn deinit(self: *XPathExpression, page: *Page) void { diff --git a/src/browser/xpath/Parser.zig b/src/browser/xpath/Parser.zig index aa969e3f..b1a841d2 100644 --- a/src/browser/xpath/Parser.zig +++ b/src/browser/xpath/Parser.zig @@ -39,11 +39,18 @@ pub const Error = error{ UnexpectedToken, ExpectedNodeTest, ExpectedPrimaryExpr, + MaxDepthExceeded, }; +/// Cap recursive descent to keep adversarial input (e.g. `(((((...)))))`, +/// `------5`) from blowing the stack. Real XPath expressions never come +/// close to this; browsers typically allow several hundred. +const max_depth: u16 = 64; + arena: Allocator, tokens: []const Token, pos: usize = 0, +depth: u16 = 0, pub fn parse(arena: Allocator, input: []const u8) Error!*Ast.Expr { var token_list: std.ArrayList(Token) = .empty; @@ -125,6 +132,9 @@ fn makeBinop(self: *Parser, op: Ast.BinOpKind, left: *Ast.Expr, right: *Ast.Expr // Or → And → Equality → Relational → Additive → Mult → Unary → Union → Path fn parseExpr(self: *Parser) Error!*Ast.Expr { + if (self.depth >= max_depth) return error.MaxDepthExceeded; + self.depth += 1; + defer self.depth -= 1; return self.parseOrExpr(); } @@ -190,6 +200,9 @@ fn parseMultExpr(self: *Parser) Error!*Ast.Expr { fn parseUnaryExpr(self: *Parser) Error!*Ast.Expr { if (self.match(.minus)) { + if (self.depth >= max_depth) return error.MaxDepthExceeded; + self.depth += 1; + defer self.depth -= 1; const operand = try self.parseUnaryExpr(); return try self.makeExpr(.{ .neg = operand }); } @@ -922,3 +935,24 @@ test "XPath.Parser: 91-case gem battery — every expression parses" { }; } } + +test "XPath.Parser: deep parenthesization rejected past max_depth" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var buf: std.ArrayList(u8) = .empty; + defer buf.deinit(testing.allocator); + try buf.appendNTimes(testing.allocator, '(', max_depth + 1); + try buf.append(testing.allocator, '1'); + try buf.appendNTimes(testing.allocator, ')', max_depth + 1); + try testing.expectError(error.MaxDepthExceeded, parse(arena.allocator(), buf.items)); +} + +test "XPath.Parser: deep unary minus rejected past max_depth" { + var arena = std.heap.ArenaAllocator.init(testing.allocator); + defer arena.deinit(); + var buf: std.ArrayList(u8) = .empty; + defer buf.deinit(testing.allocator); + try buf.appendNTimes(testing.allocator, '-', max_depth + 1); + try buf.append(testing.allocator, '1'); + try testing.expectError(error.MaxDepthExceeded, parse(arena.allocator(), buf.items)); +} diff --git a/src/browser/xpath/ast.zig b/src/browser/xpath/ast.zig index 00125e33..91e5a634 100644 --- a/src/browser/xpath/ast.zig +++ b/src/browser/xpath/ast.zig @@ -114,11 +114,12 @@ pub const Axis = enum { }; pub const NodeTest = union(enum) { - /// Element / attribute name. Special values: - /// - "*" → wildcard - /// - "prefix:*" → namespace wildcard - /// - "prefix:local" → namespace-prefixed name - /// The evaluator splits these. + /// Element / attribute name. `"*"` is the wildcard. Namespaced forms + /// (`prefix:*`, `prefix:local`) are stored verbatim — the evaluator + /// does not split them, so they fall through to a literal `mem.eql` + /// against the node name (consistent with the `namespace::` axis stub + /// per decision #3). + /// TODO: real namespace support if the polyfill ever drops the stub. name: []const u8, /// `node()`, `text()`, `comment()`, `processing-instruction()`. /// The optional target literal of `processing-instruction("foo")` From c4c700f7abb1bb726d738733ebec57239b72a9ce Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 6 May 2026 19:19:00 +0200 Subject: [PATCH 08/34] xpath: id-lookup fast path + perf benchmark evalPath recognizes //tag[@id='x'] and .//tag[@id='x'] (plus the //*[@id='x'] wildcard) and serves them via frame.getElementByIdFromNode. ~100-150x speedup on ID lookups (3231us -> 22.6us for //*[@id='target'] in the new benchmark). Falls through to general path on any deviation (extra step, extra predicate, non-eq, non-literal RHS). Inherits the same duplicate-ID compromise selector/List.zig ships for querySelector(All): the id-map stores only the first element per ID in document order. Capybara/Selenium hot paths assume unique IDs. tests/xpath/xpath_perf.html is the 13-query micro-benchmark used to collect the numbers; batched console.warn output survives test runner interleaving. --- src/browser/tests/xpath/xpath_perf.html | 171 ++++++++++++++++++++++++ src/browser/webapi/XPathResult.zig | 4 + src/browser/xpath/Evaluator.zig | 121 +++++++++++++++++ 3 files changed, 296 insertions(+) create mode 100644 src/browser/tests/xpath/xpath_perf.html diff --git a/src/browser/tests/xpath/xpath_perf.html b/src/browser/tests/xpath/xpath_perf.html new file mode 100644 index 00000000..0d31e052 --- /dev/null +++ b/src/browser/tests/xpath/xpath_perf.html @@ -0,0 +1,171 @@ + + + + XPath perf benchmark + + + + + + + + + + diff --git a/src/browser/webapi/XPathResult.zig b/src/browser/webapi/XPathResult.zig index 44c29b44..2845480e 100644 --- a/src/browser/webapi/XPathResult.zig +++ b/src/browser/webapi/XPathResult.zig @@ -281,3 +281,7 @@ test "WebApi: XPathResult" { test "WebApi: XPath conformance" { try testing.htmlRunner("xpath/xpath_conformance.html", .{}); } + +test "WebApi: XPath perf" { + try testing.htmlRunner("xpath/xpath_perf.html", .{}); +} diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig index d654ed8f..1e4b1b13 100644 --- a/src/browser/xpath/Evaluator.zig +++ b/src/browser/xpath/Evaluator.zig @@ -108,6 +108,8 @@ fn evalExpr(self: *Evaluator, expr: *const Ast.Expr, ctx: *Node, pos: usize, siz } fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result { + if (try self.tryIdLookupFastPath(path, ctx)) |result| return result; + const start: *Node = if (path.absolute) blk: { if (ctx._type == .document) break :blk ctx; const owner = ctx.ownerDocument(self.frame) orelse break :blk ctx; @@ -125,6 +127,125 @@ fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result { return .{ .node_set = current_set }; } +// Recognize the very common `//tag[@id='x']` and `.//tag[@id='x']` +// shapes (and their wildcard `//*[@id='x']` variants) and serve them +// directly from `frame.getElementByIdFromNode`. Accepts the literal on +// either side of `=`. +// +// Mirrors the same tradeoff `webapi/selector/List.zig:optimizeSelector` +// already makes for `querySelector(All)`: the id-map only stores the +// first element per ID in document order, so duplicate IDs (invalid +// HTML, but possible) yield one match here where a strict tree walk +// would find all. Acceptable because Capybara/Selenium hot paths +// assume unique IDs and CSS has shipped this compromise for years. +// +// Falls through to the general path for any deviation: extra steps, +// extra predicates, non-eq predicate, non-literal RHS, or the +// inability to resolve a search root. +fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Result.Result { + // Two acceptable AST shapes: + // //tag[@id='x'] parses to: ds::node() / child::tag[pred] + // .//tag[@id='x'] parses to: self::node() / ds::node() / child::tag[pred] + const target: Ast.Step = switch (path.steps.len) { + 2 => blk: { + if (!isDescendantOrSelfNode(path.steps[0])) return null; + break :blk path.steps[1]; + }, + 3 => blk: { + if (!isSelfNode(path.steps[0])) return null; + if (!isDescendantOrSelfNode(path.steps[1])) return null; + break :blk path.steps[2]; + }, + else => return null, + }; + + if (target.axis != .child) return null; + if (target.predicates.len != 1) return null; + + // Tag name (null = wildcard "*"). type_test (e.g. `node()`, + // `text()`) doesn't qualify because getElementByIdFromNode only + // returns elements. + const tag_name: ?[]const u8 = switch (target.node_test) { + .name => |n| if (std.mem.eql(u8, n, "*")) null else n, + .type_test => return null, + }; + + const id_value = matchAttrEqLiteral(target.predicates[0], "id") orelse return null; + + // Resolve search root the same way the general path does. + const search_root: *Node = if (path.absolute) blk: { + if (ctx._type == .document) break :blk ctx; + const owner = ctx.ownerDocument(self.frame) orelse return null; + break :blk owner.asNode(); + } else ctx; + + const id_element = self.frame.getElementByIdFromNode(search_root, id_value) orelse { + return Result.Result{ .node_set = &.{} }; + }; + const id_node = id_element.asNode(); + + // Relative paths must filter to descendants of the context. + // getElementByIdFromNode is doc-wide. + if (search_root != id_node and !search_root.contains(id_node)) { + return Result.Result{ .node_set = &.{} }; + } + + // Tag check (case-insensitive per decision #2). Element tag names + // are stored lowercase via `getTagNameLower`; lowercase the AST + // name once and compare. + if (tag_name) |tag| { + const lowered = try std.ascii.allocLowerString(self.arena, tag); + if (!std.mem.eql(u8, lowered, id_element.getTagNameLower())) { + return Result.Result{ .node_set = &.{} }; + } + } + + const out = try self.arena.alloc(*Node, 1); + out[0] = id_node; + return Result.Result{ .node_set = out }; +} + +fn isDescendantOrSelfNode(s: Ast.Step) bool { + if (s.axis != .descendant_or_self) return false; + if (s.predicates.len != 0) return false; + return switch (s.node_test) { + .type_test => |k| k == .node, + .name => false, + }; +} + +fn isSelfNode(s: Ast.Step) bool { + if (s.axis != .self) return false; + if (s.predicates.len != 0) return false; + return switch (s.node_test) { + .type_test => |k| k == .node, + .name => false, + }; +} + +fn matchAttrEqLiteral(expr: *const Ast.Expr, attr_name: []const u8) ?[]const u8 { + if (expr.* != .binop) return null; + const bo = expr.binop; + if (bo.op != .eq) return null; + if (isAttrPath(bo.left, attr_name) and bo.right.* == .literal) return bo.right.literal; + if (isAttrPath(bo.right, attr_name) and bo.left.* == .literal) return bo.left.literal; + return null; +} + +fn isAttrPath(expr: *const Ast.Expr, attr_name: []const u8) bool { + if (expr.* != .path) return false; + const p = expr.path; + if (p.absolute) return false; + if (p.steps.len != 1) return false; + const s = p.steps[0]; + if (s.axis != .attribute) return false; + if (s.predicates.len != 0) return false; + return switch (s.node_test) { + .name => |n| std.mem.eql(u8, n, attr_name), + .type_test => false, + }; +} + fn evalFilterPath(self: *Evaluator, fp: Ast.FilterPath, ctx: *Node, pos: usize, size: usize) Error!Result.Result { const base = try self.evalExpr(fp.filter, ctx, pos, size); if (base != .node_set) return base; From ce722c1f6e4649130b4d73dec1994fcfe0cf7452 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 6 May 2026 19:41:53 +0200 Subject: [PATCH 09/34] xpath: extend fast path to non-positional descendant queries Generalizes 8733e33b's //tag[@id='x'] shape: tryFusedDescendantFastPath handles any //tag[safe] or .//tag[safe] where the predicates are non-positional boolean/node-set checks. Walks the search root's descendants once in document order, applies node test + predicates inline, no per-step materialization, no dedup. 5-9x on //div, //*, //*[@class='x'], //div[contains(...)]; ~25x on (//div)[1] and count(//div) where the inner path is the shape. Safety gate rejects predicates that could produce a number at the top level (number, neg, arithmetic binop, numeric-returning fn-call) and any predicate containing position()/last() anywhere. Conservative: a nested sub-path's local positional predicate is rejected even though it's scoped to its own axis. --- src/browser/xpath/Evaluator.zig | 158 +++++++++++++++++++++++++++++--- 1 file changed, 146 insertions(+), 12 deletions(-) diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig index 1e4b1b13..4dfcc85c 100644 --- a/src/browser/xpath/Evaluator.zig +++ b/src/browser/xpath/Evaluator.zig @@ -109,6 +109,7 @@ fn evalExpr(self: *Evaluator, expr: *const Ast.Expr, ctx: *Node, pos: usize, siz fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result { if (try self.tryIdLookupFastPath(path, ctx)) |result| return result; + if (try self.tryFusedDescendantFastPath(path, ctx)) |result| return result; const start: *Node = if (path.absolute) blk: { if (ctx._type == .document) break :blk ctx; @@ -146,18 +147,7 @@ fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Resu // Two acceptable AST shapes: // //tag[@id='x'] parses to: ds::node() / child::tag[pred] // .//tag[@id='x'] parses to: self::node() / ds::node() / child::tag[pred] - const target: Ast.Step = switch (path.steps.len) { - 2 => blk: { - if (!isDescendantOrSelfNode(path.steps[0])) return null; - break :blk path.steps[1]; - }, - 3 => blk: { - if (!isSelfNode(path.steps[0])) return null; - if (!isDescendantOrSelfNode(path.steps[1])) return null; - break :blk path.steps[2]; - }, - else => return null, - }; + const target = matchDescendantPathShape(path) orelse return null; if (target.axis != .child) return null; if (target.predicates.len != 1) return null; @@ -205,6 +195,150 @@ fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Resu return Result.Result{ .node_set = out }; } +// Generalization of `tryIdLookupFastPath` to non-ID predicates. Same +// AST shape (`//[preds]` / `.//[preds]`), but instead of +// dispatching to `getElementByIdFromNode`, walks the descendants of +// the search root once in document order, applying the node test and +// any "safe" non-positional predicates inline. Skips the general path's +// per-step axis materialization, the per-step `filtered`/`current` +// ArrayLists, and the dedup hash map (single-context forward walk +// already preserves doc order). +// +// Hits the bulk of the benchmark's remaining cost: `//div`, `//*`, +// `//*[@class='x']`, `//div[@class='x']`, `//div[contains(@class,'x')]`. +// +// "Safe" predicates: not numeric at the top level (number, neg, +// arithmetic binop, or a fn-call returning a number), and free of +// `position()`/`last()` anywhere in the predicate AST. Numeric predicates +// would need `position()` context which the fused walk doesn't track, +// and a `position()`/`last()` reference inside a sub-path's own step is +// rejected conservatively even though it's local to that sub-axis. +fn tryFusedDescendantFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Result.Result { + const target = matchDescendantPathShape(path) orelse return null; + if (target.axis != .child) return null; + + for (target.predicates) |p| { + if (!isSafeNonPositionalPredicate(p)) return null; + } + + const lowered_name: ?[]const u8 = switch (target.node_test) { + .name => |n| if (std.mem.eql(u8, n, "*")) null else try std.ascii.allocLowerString(self.arena, n), + .type_test => null, + }; + + const search_root: *Node = if (path.absolute) blk: { + if (ctx._type == .document) break :blk ctx; + const owner = ctx.ownerDocument(self.frame) orelse return null; + break :blk owner.asNode(); + } else ctx; + + var out: std.ArrayList(*Node) = .empty; + try self.fusedDescend(search_root, target, lowered_name, &out); + return Result.Result{ .node_set = out.items }; +} + +fn fusedDescend( + self: *Evaluator, + parent: *Node, + target: Ast.Step, + lowered_name: ?[]const u8, + out: *std.ArrayList(*Node), +) Error!void { + var it = parent.childrenIterator(); + while (it.next()) |c| { + if (matchTest(c, target.node_test, target.axis, lowered_name)) { + var ok = true; + for (target.predicates) |pred| { + // Position / size are synthetic. Safe because the + // predicate-safety gate already rejected any expression + // that depends on either. + const val = try self.evalExpr(pred, c, 1, 1); + if (!Result.toBoolean(val)) { + ok = false; + break; + } + } + if (ok) try out.append(self.arena, c); + } + try self.fusedDescend(c, target, lowered_name, out); + } +} + +fn matchDescendantPathShape(path: Ast.Path) ?Ast.Step { + return switch (path.steps.len) { + 2 => blk: { + if (!isDescendantOrSelfNode(path.steps[0])) break :blk null; + break :blk path.steps[1]; + }, + 3 => blk: { + if (!isSelfNode(path.steps[0])) break :blk null; + if (!isDescendantOrSelfNode(path.steps[1])) break :blk null; + break :blk path.steps[2]; + }, + else => null, + }; +} + +fn isSafeNonPositionalPredicate(expr: *const Ast.Expr) bool { + if (isNumericTopLevel(expr)) return false; + if (containsPositionOrLast(expr)) return false; + return true; +} + +fn isNumericTopLevel(expr: *const Ast.Expr) bool { + return switch (expr.*) { + .number, .neg => true, + .binop => |bo| switch (bo.op) { + .add, .sub, .mul, .div, .mod => true, + else => false, + }, + .fn_call => |fc| isNumericFnName(fc.name), + else => false, + }; +} + +fn isNumericFnName(name: []const u8) bool { + const numeric = [_][]const u8{ + "position", "last", "count", "sum", + "floor", "ceiling", "round", "number", + "string-length", + }; + for (numeric) |n| { + if (std.mem.eql(u8, name, n)) return true; + } + return false; +} + +fn containsPositionOrLast(expr: *const Ast.Expr) bool { + return switch (expr.*) { + .number, .literal, .var_ref => false, + .neg => |inner| containsPositionOrLast(inner), + .binop => |bo| containsPositionOrLast(bo.left) or containsPositionOrLast(bo.right), + .filter => |f| containsPositionOrLast(f.expr) or containsPositionOrLast(f.predicate), + .filter_path => |fp| containsPositionOrLast(fp.filter) or stepsContainPositionOrLast(fp.steps), + .path => |p| stepsContainPositionOrLast(p.steps), + .fn_call => |fc| std.mem.eql(u8, fc.name, "position") or + std.mem.eql(u8, fc.name, "last") or + argsContainPositionOrLast(fc.args), + }; +} + +fn stepsContainPositionOrLast(steps: []const Ast.Step) bool { + for (steps) |s| { + for (s.predicates) |p| { + if (containsPositionOrLast(p)) return true; + } + } + return false; +} + +fn argsContainPositionOrLast(args: []const *Ast.Expr) bool { + for (args) |a| { + if (containsPositionOrLast(a)) return true; + } + return false; +} + fn isDescendantOrSelfNode(s: Ast.Step) bool { if (s.axis != .descendant_or_self) return false; if (s.predicates.len != 0) return false; From 9830da04d80e5f4912c63ab878694cbea009619c Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Fri, 8 May 2026 08:22:18 +0800 Subject: [PATCH 10/34] Naming convention fixes Disable xpath_perf benchmark from test run as its quite verbose. --- src/browser/tests/xpath/xpath_perf.html | 4 +- src/browser/webapi/XPathResult.zig | 21 ++-- src/browser/xpath/Evaluator.zig | 140 ++++++++++----------- src/browser/xpath/Parser.zig | 160 ++++++++++++------------ src/browser/xpath/functions.zig | 108 ++++++++-------- 5 files changed, 217 insertions(+), 216 deletions(-) diff --git a/src/browser/tests/xpath/xpath_perf.html b/src/browser/tests/xpath/xpath_perf.html index 0d31e052..21cac3fc 100644 --- a/src/browser/tests/xpath/xpath_perf.html +++ b/src/browser/tests/xpath/xpath_perf.html @@ -16,8 +16,8 @@ mismatch fails the test loudly via testing.fail so a regression in result count can't be hidden by the timing line. - Run: make test F=xpath_perf - Filter: make test F=xpath_perf 2>&1 | grep '\[xpath-perf\]' + Tun run, uncomment the test in XPathResult.zig (bottom of the file), then: + Run: make test F="#xpath_perf" Query shapes target the optimization roadmap: //*[@id='x'] — global ID lookup (fast-path candidate) diff --git a/src/browser/webapi/XPathResult.zig b/src/browser/webapi/XPathResult.zig index 2845480e..c029b48e 100644 --- a/src/browser/webapi/XPathResult.zig +++ b/src/browser/webapi/XPathResult.zig @@ -17,7 +17,7 @@ // along with this program. If not, see . //! WHATWG `XPathResult` (full surface, all 10 type constants — decision -//! #4). Wraps the evaluator's `Result.Result` for JS consumption: +//! #4). Wraps the evaluator's `result.Result` for JS consumption: //! coerces to the requested result type at construction, exposes the //! type-tagged accessors, and serves the iterator/snapshot APIs. //! @@ -44,9 +44,9 @@ const Node = @import("Node.zig"); // XPath runtime helpers. Aliased to keep the cross-directory imports // readable when both modules expose a `Result` type. const xpath = struct { + const result = @import("../xpath/result.zig"); const Parser = @import("../xpath/Parser.zig"); const Evaluator = @import("../xpath/Evaluator.zig"); - const Result = @import("../xpath/result.zig"); }; const Allocator = std.mem.Allocator; @@ -105,14 +105,14 @@ pub fn fromExpression( return fromResult(arena, requested_type, result); } -/// Wrap an already-evaluated `Result.Result` into an XPathResult. The +/// Wrap an already-evaluated `result.result` into an XPathResult. The /// caller hands over ownership of `arena` — the XPathResult will release /// it on deinit. Used by `XPathExpression.evaluate` (which has its own /// AST cache and only allocates a fresh result arena). pub fn fromResult( arena: Allocator, requested_type: u16, - result: xpath.Result.Result, + result: xpath.result.Result, ) !*XPathResult { const value: Value = switch (requested_type) { ANY_TYPE => switch (result) { @@ -121,9 +121,9 @@ pub fn fromResult( .boolean => |b| .{ .boolean = b }, .node_set => |ns| .{ .nodes = ns }, }, - NUMBER_TYPE => .{ .number = try xpath.Result.toNumber(arena, result) }, - STRING_TYPE => .{ .string = try xpath.Result.toString(arena, result) }, - BOOLEAN_TYPE => .{ .boolean = xpath.Result.toBoolean(result) }, + NUMBER_TYPE => .{ .number = try xpath.result.toNumber(arena, result) }, + STRING_TYPE => .{ .string = try xpath.result.toString(arena, result) }, + BOOLEAN_TYPE => .{ .boolean = xpath.result.toBoolean(result) }, UNORDERED_NODE_ITERATOR_TYPE, ORDERED_NODE_ITERATOR_TYPE, UNORDERED_NODE_SNAPSHOT_TYPE, @@ -282,6 +282,7 @@ test "WebApi: XPath conformance" { try testing.htmlRunner("xpath/xpath_conformance.html", .{}); } -test "WebApi: XPath perf" { - try testing.htmlRunner("xpath/xpath_perf.html", .{}); -} +// This uses console.warn, uncomment if you want to run it +// test "WebApi: XPath perf" { +// try testing.htmlRunner("xpath/xpath_perf.html", .{}); +// } diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig index 4dfcc85c..c0d9ba5e 100644 --- a/src/browser/xpath/Evaluator.zig +++ b/src/browser/xpath/Evaluator.zig @@ -16,7 +16,7 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -//! XPath 1.0 evaluator — runs an `Ast.Expr` against a context node and +//! XPath 1.0 evaluator — runs an `ast.Expr` against a context node and //! produces a `Result`. Mirrors the polyfill's `evaluate()` and //! `evalStep()` (lib/capybara/lightpanda/javascripts/index.js, lines //! 344–644). The evaluator allocates intermediate values (node-set @@ -34,10 +34,10 @@ const lp = @import("lightpanda"); const Node = @import("../webapi/Node.zig"); -const Ast = @import("ast.zig"); +const ast = @import("ast.zig"); const Parser = @import("Parser.zig"); -const Result = @import("result.zig"); -const Functions = @import("functions.zig"); +const result = @import("result.zig"); +const functions = @import("functions.zig"); const Frame = lp.Frame; const Element = Node.Element; @@ -64,13 +64,13 @@ frame: *Frame, /// Public entry. Returns the AST's value; node-sets are sorted into /// document order before return per XPath spec §3.3. -pub fn evaluate(arena: Allocator, expr: *const Ast.Expr, context_node: *Node, frame: *Frame) Error!Result.Result { +pub fn evaluate(arena: Allocator, expr: *const ast.Expr, context_node: *Node, frame: *Frame) Error!result.Result { var ev = Evaluator{ .arena = arena, .frame = frame }; - const result = try ev.evalExpr(expr, context_node, 1, 1); - if (result == .node_set) { - sortDocOrder(@constCast(result.node_set)); + const res = try ev.evalExpr(expr, context_node, 1, 1); + if (res == .node_set) { + sortDocOrder(@constCast(res.node_set)); } - return result; + return res; } pub const SearchError = Error || Parser.Error; @@ -89,14 +89,14 @@ pub fn searchAll(arena: Allocator, root: *Node, expression: []const u8, frame: * // ----- AST evaluation ----- -fn evalExpr(self: *Evaluator, expr: *const Ast.Expr, ctx: *Node, pos: usize, size: usize) Error!Result.Result { +fn evalExpr(self: *Evaluator, expr: *const ast.Expr, ctx: *Node, pos: usize, size: usize) Error!result.Result { return switch (expr.*) { .number => |n| .{ .number = n }, .literal => |s| .{ .string = s }, .var_ref => .{ .string = "" }, // decision #3 stub .neg => |inner| blk: { const v = try self.evalExpr(inner, ctx, pos, size); - const n = try Result.toNumber(self.arena, v); + const n = try result.toNumber(self.arena, v); break :blk .{ .number = -n }; }, .binop => |bo| try self.evalBinop(bo, ctx, pos, size), @@ -107,9 +107,9 @@ fn evalExpr(self: *Evaluator, expr: *const Ast.Expr, ctx: *Node, pos: usize, siz }; } -fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result { - if (try self.tryIdLookupFastPath(path, ctx)) |result| return result; - if (try self.tryFusedDescendantFastPath(path, ctx)) |result| return result; +fn evalPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!result.Result { + if (try self.tryIdLookupFastPath(path, ctx)) |res| return res; + if (try self.tryFusedDescendantFastPath(path, ctx)) |res| return res; const start: *Node = if (path.absolute) blk: { if (ctx._type == .document) break :blk ctx; @@ -143,7 +143,7 @@ fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result { // Falls through to the general path for any deviation: extra steps, // extra predicates, non-eq predicate, non-literal RHS, or the // inability to resolve a search root. -fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Result.Result { +fn tryIdLookupFastPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!?result.Result { // Two acceptable AST shapes: // //tag[@id='x'] parses to: ds::node() / child::tag[pred] // .//tag[@id='x'] parses to: self::node() / ds::node() / child::tag[pred] @@ -170,14 +170,14 @@ fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Resu } else ctx; const id_element = self.frame.getElementByIdFromNode(search_root, id_value) orelse { - return Result.Result{ .node_set = &.{} }; + return .{ .node_set = &.{} }; }; const id_node = id_element.asNode(); // Relative paths must filter to descendants of the context. // getElementByIdFromNode is doc-wide. if (search_root != id_node and !search_root.contains(id_node)) { - return Result.Result{ .node_set = &.{} }; + return .{ .node_set = &.{} }; } // Tag check (case-insensitive per decision #2). Element tag names @@ -186,13 +186,13 @@ fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Resu if (tag_name) |tag| { const lowered = try std.ascii.allocLowerString(self.arena, tag); if (!std.mem.eql(u8, lowered, id_element.getTagNameLower())) { - return Result.Result{ .node_set = &.{} }; + return .{ .node_set = &.{} }; } } const out = try self.arena.alloc(*Node, 1); out[0] = id_node; - return Result.Result{ .node_set = out }; + return .{ .node_set = out }; } // Generalization of `tryIdLookupFastPath` to non-ID predicates. Same @@ -213,7 +213,7 @@ fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Resu // would need `position()` context which the fused walk doesn't track, // and a `position()`/`last()` reference inside a sub-path's own step is // rejected conservatively even though it's local to that sub-axis. -fn tryFusedDescendantFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Result.Result { +fn tryFusedDescendantFastPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!?result.Result { const target = matchDescendantPathShape(path) orelse return null; if (target.axis != .child) return null; @@ -234,13 +234,13 @@ fn tryFusedDescendantFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Erro var out: std.ArrayList(*Node) = .empty; try self.fusedDescend(search_root, target, lowered_name, &out); - return Result.Result{ .node_set = out.items }; + return .{ .node_set = out.items }; } fn fusedDescend( self: *Evaluator, parent: *Node, - target: Ast.Step, + target: ast.Step, lowered_name: ?[]const u8, out: *std.ArrayList(*Node), ) Error!void { @@ -253,7 +253,7 @@ fn fusedDescend( // predicate-safety gate already rejected any expression // that depends on either. const val = try self.evalExpr(pred, c, 1, 1); - if (!Result.toBoolean(val)) { + if (!result.toBoolean(val)) { ok = false; break; } @@ -264,7 +264,7 @@ fn fusedDescend( } } -fn matchDescendantPathShape(path: Ast.Path) ?Ast.Step { +fn matchDescendantPathShape(path: ast.Path) ?ast.Step { return switch (path.steps.len) { 2 => blk: { if (!isDescendantOrSelfNode(path.steps[0])) break :blk null; @@ -279,13 +279,13 @@ fn matchDescendantPathShape(path: Ast.Path) ?Ast.Step { }; } -fn isSafeNonPositionalPredicate(expr: *const Ast.Expr) bool { +fn isSafeNonPositionalPredicate(expr: *const ast.Expr) bool { if (isNumericTopLevel(expr)) return false; if (containsPositionOrLast(expr)) return false; return true; } -fn isNumericTopLevel(expr: *const Ast.Expr) bool { +fn isNumericTopLevel(expr: *const ast.Expr) bool { return switch (expr.*) { .number, .neg => true, .binop => |bo| switch (bo.op) { @@ -309,7 +309,7 @@ fn isNumericFnName(name: []const u8) bool { return false; } -fn containsPositionOrLast(expr: *const Ast.Expr) bool { +fn containsPositionOrLast(expr: *const ast.Expr) bool { return switch (expr.*) { .number, .literal, .var_ref => false, .neg => |inner| containsPositionOrLast(inner), @@ -323,7 +323,7 @@ fn containsPositionOrLast(expr: *const Ast.Expr) bool { }; } -fn stepsContainPositionOrLast(steps: []const Ast.Step) bool { +fn stepsContainPositionOrLast(steps: []const ast.Step) bool { for (steps) |s| { for (s.predicates) |p| { if (containsPositionOrLast(p)) return true; @@ -332,14 +332,14 @@ fn stepsContainPositionOrLast(steps: []const Ast.Step) bool { return false; } -fn argsContainPositionOrLast(args: []const *Ast.Expr) bool { +fn argsContainPositionOrLast(args: []const *ast.Expr) bool { for (args) |a| { if (containsPositionOrLast(a)) return true; } return false; } -fn isDescendantOrSelfNode(s: Ast.Step) bool { +fn isDescendantOrSelfNode(s: ast.Step) bool { if (s.axis != .descendant_or_self) return false; if (s.predicates.len != 0) return false; return switch (s.node_test) { @@ -348,7 +348,7 @@ fn isDescendantOrSelfNode(s: Ast.Step) bool { }; } -fn isSelfNode(s: Ast.Step) bool { +fn isSelfNode(s: ast.Step) bool { if (s.axis != .self) return false; if (s.predicates.len != 0) return false; return switch (s.node_test) { @@ -357,7 +357,7 @@ fn isSelfNode(s: Ast.Step) bool { }; } -fn matchAttrEqLiteral(expr: *const Ast.Expr, attr_name: []const u8) ?[]const u8 { +fn matchAttrEqLiteral(expr: *const ast.Expr, attr_name: []const u8) ?[]const u8 { if (expr.* != .binop) return null; const bo = expr.binop; if (bo.op != .eq) return null; @@ -366,7 +366,7 @@ fn matchAttrEqLiteral(expr: *const Ast.Expr, attr_name: []const u8) ?[]const u8 return null; } -fn isAttrPath(expr: *const Ast.Expr, attr_name: []const u8) bool { +fn isAttrPath(expr: *const ast.Expr, attr_name: []const u8) bool { if (expr.* != .path) return false; const p = expr.path; if (p.absolute) return false; @@ -380,7 +380,7 @@ fn isAttrPath(expr: *const Ast.Expr, attr_name: []const u8) bool { }; } -fn evalFilterPath(self: *Evaluator, fp: Ast.FilterPath, ctx: *Node, pos: usize, size: usize) Error!Result.Result { +fn evalFilterPath(self: *Evaluator, fp: ast.FilterPath, ctx: *Node, pos: usize, size: usize) Error!result.Result { const base = try self.evalExpr(fp.filter, ctx, pos, size); if (base != .node_set) return base; @@ -392,7 +392,7 @@ fn evalFilterPath(self: *Evaluator, fp: Ast.FilterPath, ctx: *Node, pos: usize, return .{ .node_set = current }; } -fn evalFilter(self: *Evaluator, f: Ast.Filter, ctx: *Node, pos: usize, size: usize) Error!Result.Result { +fn evalFilter(self: *Evaluator, f: ast.Filter, ctx: *Node, pos: usize, size: usize) Error!result.Result { const base = try self.evalExpr(f.expr, ctx, pos, size); if (base != .node_set) return base; @@ -408,7 +408,7 @@ fn evalFilter(self: *Evaluator, f: Ast.Filter, ctx: *Node, pos: usize, size: usi // ----- step + axis ----- -fn evalStep(self: *Evaluator, ctx_nodes: []const *Node, step: Ast.Step) Error!Result.Result { +fn evalStep(self: *Evaluator, ctx_nodes: []const *Node, step: ast.Step) Error!result.Result { var dedup: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty; // Pre-lowercase the name test once per step. matchNameTest does @@ -447,7 +447,7 @@ fn evalStep(self: *Evaluator, ctx_nodes: []const *Node, step: Ast.Step) Error!Re return .{ .node_set = dedup.keys() }; } -fn axisNodes(self: *Evaluator, node: *Node, axis: Ast.Axis) Error![]const *Node { +fn axisNodes(self: *Evaluator, node: *Node, axis: ast.Axis) Error![]const *Node { var out: std.ArrayList(*Node) = .empty; switch (axis) { .child => { @@ -548,7 +548,7 @@ fn appendAttributes(self: *Evaluator, node: *Node, out: *std.ArrayList(*Node)) E // ----- node test matching ----- -fn matchTest(node: *Node, test_: Ast.NodeTest, axis: Ast.Axis, lowered_name: ?[]const u8) bool { +fn matchTest(node: *Node, test_: ast.NodeTest, axis: ast.Axis, lowered_name: ?[]const u8) bool { return switch (test_) { .type_test => |kind| switch (kind) { .node => true, @@ -563,7 +563,7 @@ fn matchTest(node: *Node, test_: Ast.NodeTest, axis: Ast.Axis, lowered_name: ?[] }; } -fn matchNameTest(node: *Node, name: []const u8, axis: Ast.Axis, lowered_name: ?[]const u8) bool { +fn matchNameTest(node: *Node, name: []const u8, axis: ast.Axis, lowered_name: ?[]const u8) bool { // `lowered_name` is non-null iff `name != "*"`. Element tag names // (`getTagNameLower`) and html5ever-stored attribute names are already // lowercase, so a plain `mem.eql` against the pre-lowered test name @@ -583,19 +583,19 @@ fn matchNameTest(node: *Node, name: []const u8, axis: Ast.Axis, lowered_name: ?[ // ----- binop ----- -fn evalBinop(self: *Evaluator, bo: Ast.BinOp, ctx: *Node, pos: usize, size: usize) Error!Result.Result { +fn evalBinop(self: *Evaluator, bo: ast.BinOp, ctx: *Node, pos: usize, size: usize) Error!result.Result { switch (bo.op) { .or_ => { const l = try self.evalExpr(bo.left, ctx, pos, size); - if (Result.toBoolean(l)) return .{ .boolean = true }; + if (result.toBoolean(l)) return .{ .boolean = true }; const r = try self.evalExpr(bo.right, ctx, pos, size); - return .{ .boolean = Result.toBoolean(r) }; + return .{ .boolean = result.toBoolean(r) }; }, .and_ => { const l = try self.evalExpr(bo.left, ctx, pos, size); - if (!Result.toBoolean(l)) return .{ .boolean = false }; + if (!result.toBoolean(l)) return .{ .boolean = false }; const r = try self.evalExpr(bo.right, ctx, pos, size); - return .{ .boolean = Result.toBoolean(r) }; + return .{ .boolean = result.toBoolean(r) }; }, .eq, .neq, .lt, .gt, .lte, .gte => { const l = try self.evalExpr(bo.left, ctx, pos, size); @@ -605,8 +605,8 @@ fn evalBinop(self: *Evaluator, bo: Ast.BinOp, ctx: *Node, pos: usize, size: usiz .add, .sub, .mul, .div, .mod => { const l = try self.evalExpr(bo.left, ctx, pos, size); const r = try self.evalExpr(bo.right, ctx, pos, size); - const ln = try Result.toNumber(self.arena, l); - const rn = try Result.toNumber(self.arena, r); + const ln = try result.toNumber(self.arena, l); + const rn = try result.toNumber(self.arena, r); const v: f64 = switch (bo.op) { .add => ln + rn, .sub => ln - rn, @@ -635,7 +635,7 @@ fn evalBinop(self: *Evaluator, bo: Ast.BinOp, ctx: *Node, pos: usize, size: usiz // ----- comparison (XPath spec §3.4) ----- -fn xCmp(self: *Evaluator, left: Result.Result, right: Result.Result, op: Ast.BinOpKind) Error!bool { +fn xCmp(self: *Evaluator, left: result.Result, right: result.Result, op: ast.BinOpKind) Error!bool { const is_eq = (op == .eq or op == .neq); const l_is_set = (left == .node_set); const r_is_set = (right == .node_set); @@ -646,15 +646,15 @@ fn xCmp(self: *Evaluator, left: Result.Result, right: Result.Result, op: Ast.Bin // (e.g. `//foo = //bar` on a large page). const right_strings = try self.arena.alloc([]const u8, right.node_set.len); for (right.node_set, 0..) |r, i| { - right_strings[i] = try Result.stringValueOf(self.arena, r); + right_strings[i] = try result.stringValueOf(self.arena, r); } for (left.node_set) |l| { - const lv = try Result.stringValueOf(self.arena, l); + const lv = try result.stringValueOf(self.arena, l); for (right_strings) |rv| { const matched = if (is_eq) cmpString(lv, rv, op) else - cmpNumber(Result.stringToNumber(lv), Result.stringToNumber(rv), op); + cmpNumber(result.stringToNumber(lv), result.stringToNumber(rv), op); if (matched) return true; } } @@ -673,10 +673,10 @@ fn xCmp(self: *Evaluator, left: Result.Result, right: Result.Result, op: Ast.Bin } for (ns) |n| { - const sv = try Result.stringValueOf(self.arena, n); + const sv = try result.stringValueOf(self.arena, n); const matched = switch (other) { .number => |num| blk: { - const sv_num = Result.stringToNumber(sv); + const sv_num = result.stringToNumber(sv); const a, const b = if (ns_left) .{ sv_num, num } else .{ num, sv_num }; break :blk cmpNumber(a, b, op); }, @@ -685,8 +685,8 @@ fn xCmp(self: *Evaluator, left: Result.Result, right: Result.Result, op: Ast.Bin const a, const b = if (ns_left) .{ sv, s } else .{ s, sv }; break :blk cmpString(a, b, op); } - const sv_num = Result.stringToNumber(sv); - const s_num = Result.stringToNumber(s); + const sv_num = result.stringToNumber(sv); + const s_num = result.stringToNumber(s); const a, const b = if (ns_left) .{ sv_num, s_num } else .{ s_num, sv_num }; break :blk cmpNumber(a, b, op); }, @@ -700,24 +700,24 @@ fn xCmp(self: *Evaluator, left: Result.Result, right: Result.Result, op: Ast.Bin // Neither is a node-set. if (is_eq) { if (left == .boolean or right == .boolean) { - return cmpBool(Result.toBoolean(left), Result.toBoolean(right), op); + return cmpBool(result.toBoolean(left), result.toBoolean(right), op); } if (left == .number or right == .number) { - const ln = try Result.toNumber(self.arena, left); - const rn = try Result.toNumber(self.arena, right); + const ln = try result.toNumber(self.arena, left); + const rn = try result.toNumber(self.arena, right); return cmpNumber(ln, rn, op); } - const ls = try Result.toString(self.arena, left); - const rs = try Result.toString(self.arena, right); + const ls = try result.toString(self.arena, left); + const rs = try result.toString(self.arena, right); return cmpString(ls, rs, op); } // Non-eq with no node-set: both → number. - const ln = try Result.toNumber(self.arena, left); - const rn = try Result.toNumber(self.arena, right); + const ln = try result.toNumber(self.arena, left); + const rn = try result.toNumber(self.arena, right); return cmpNumber(ln, rn, op); } -fn cmpString(a: []const u8, b: []const u8, op: Ast.BinOpKind) bool { +fn cmpString(a: []const u8, b: []const u8, op: ast.BinOpKind) bool { const equal = std.mem.eql(u8, a, b); return switch (op) { .eq => equal, @@ -726,7 +726,7 @@ fn cmpString(a: []const u8, b: []const u8, op: Ast.BinOpKind) bool { }; } -fn cmpNumber(a: f64, b: f64, op: Ast.BinOpKind) bool { +fn cmpNumber(a: f64, b: f64, op: ast.BinOpKind) bool { // Native f64 comparison gives correct NaN semantics: // NaN == X is false, NaN != X is true, NaN < X (etc.) is false. return switch (op) { @@ -740,7 +740,7 @@ fn cmpNumber(a: f64, b: f64, op: Ast.BinOpKind) bool { }; } -fn cmpBool(a: bool, b: bool, op: Ast.BinOpKind) bool { +fn cmpBool(a: bool, b: bool, op: ast.BinOpKind) bool { return switch (op) { .eq => a == b, .neq => a != b, @@ -750,9 +750,9 @@ fn cmpBool(a: bool, b: bool, op: Ast.BinOpKind) bool { // ----- function calls ----- -fn evalFnCall(self: *Evaluator, fc: Ast.FnCall, ctx: *Node, pos: usize, size: usize) Error!Result.Result { +fn evalFnCall(self: *Evaluator, fc: ast.FnCall, ctx: *Node, pos: usize, size: usize) Error!result.Result { // position()/last() stay here — they need the (pos, size) closure - // that Functions.call doesn't see. Keeping them inline avoids + // that functions.call doesn't see. Keeping them inline avoids // pushing per-call context through Functions' signature. if (std.mem.eql(u8, fc.name, "position")) return .{ .number = @floatFromInt(pos) }; if (std.mem.eql(u8, fc.name, "last")) return .{ .number = @floatFromInt(size) }; @@ -760,20 +760,20 @@ fn evalFnCall(self: *Evaluator, fc: Ast.FnCall, ctx: *Node, pos: usize, size: us // Eagerly evaluate args. Matches the polyfill's `evaluate(args[i], ...)` // pattern; lazy short-circuit isn't needed because `or`/`and` are // binops handled in evalBinop, not function calls. - const eval_args = try self.arena.alloc(Result.Result, fc.args.len); + const eval_args = try self.arena.alloc(result.Result, fc.args.len); for (fc.args, 0..) |a, i| eval_args[i] = try self.evalExpr(a, ctx, pos, size); - return Functions.call(self.arena, fc.name, eval_args, ctx, self.frame); + return functions.call(self.arena, fc.name, eval_args, ctx, self.frame); } // ----- helpers ----- -fn predicateMatches(val: Result.Result, position: usize) bool { +fn predicateMatches(val: result.Result, position: usize) bool { return switch (val) { // Numeric predicate value selects only the node at that position // (1-based). Non-integer numbers never match. .number => |n| n == @as(f64, @floatFromInt(position)), - else => Result.toBoolean(val), + else => result.toBoolean(val), }; } diff --git a/src/browser/xpath/Parser.zig b/src/browser/xpath/Parser.zig index b1a841d2..96eda3b4 100644 --- a/src/browser/xpath/Parser.zig +++ b/src/browser/xpath/Parser.zig @@ -20,14 +20,14 @@ //! //! Mirrors the polyfill `Parser.prototype.*` chain in capybara-lightpanda //! (lib/capybara/lightpanda/javascripts/index.js): recursive descent over -//! a fully-tokenized stream, producing an `Ast.Expr` tree allocated on +//! a fully-tokenized stream, producing an `ast.Expr` tree allocated on //! the caller's arena. The AST borrows string/name slices from `input` //! and is valid for as long as the arena and input outlive it. const std = @import("std"); const Tokenizer = @import("Tokenizer.zig"); -const Ast = @import("ast.zig"); +const ast = @import("ast.zig"); const Token = Tokenizer.Token; const Allocator = std.mem.Allocator; @@ -52,7 +52,7 @@ tokens: []const Token, pos: usize = 0, depth: u16 = 0, -pub fn parse(arena: Allocator, input: []const u8) Error!*Ast.Expr { +pub fn parse(arena: Allocator, input: []const u8) Error!*ast.Expr { var token_list: std.ArrayList(Token) = .empty; // Token count is bounded by input length; ¼-byte-per-token is // generous for typical XPath and skips ArrayList regrowth. @@ -117,13 +117,13 @@ fn matchKeyword(self: *Parser, keyword: []const u8) bool { return false; } -fn makeExpr(self: *Parser, value: Ast.Expr) Error!*Ast.Expr { - const expr = try self.arena.create(Ast.Expr); +fn makeExpr(self: *Parser, value: ast.Expr) Error!*ast.Expr { + const expr = try self.arena.create(ast.Expr); expr.* = value; return expr; } -fn makeBinop(self: *Parser, op: Ast.BinOpKind, left: *Ast.Expr, right: *Ast.Expr) Error!*Ast.Expr { +fn makeBinop(self: *Parser, op: ast.BinOpKind, left: *ast.Expr, right: *ast.Expr) Error!*ast.Expr { return try self.makeExpr(.{ .binop = .{ .op = op, .left = left, .right = right } }); } @@ -131,14 +131,14 @@ fn makeBinop(self: *Parser, op: Ast.BinOpKind, left: *Ast.Expr, right: *Ast.Expr // // Or → And → Equality → Relational → Additive → Mult → Unary → Union → Path -fn parseExpr(self: *Parser) Error!*Ast.Expr { +fn parseExpr(self: *Parser) Error!*ast.Expr { if (self.depth >= max_depth) return error.MaxDepthExceeded; self.depth += 1; defer self.depth -= 1; return self.parseOrExpr(); } -fn parseOrExpr(self: *Parser) Error!*Ast.Expr { +fn parseOrExpr(self: *Parser) Error!*ast.Expr { var left = try self.parseAndExpr(); while (self.matchKeyword("or")) { const right = try self.parseAndExpr(); @@ -147,7 +147,7 @@ fn parseOrExpr(self: *Parser) Error!*Ast.Expr { return left; } -fn parseAndExpr(self: *Parser) Error!*Ast.Expr { +fn parseAndExpr(self: *Parser) Error!*ast.Expr { var left = try self.parseEqualityExpr(); while (self.matchKeyword("and")) { const right = try self.parseEqualityExpr(); @@ -156,7 +156,7 @@ fn parseAndExpr(self: *Parser) Error!*Ast.Expr { return left; } -fn parseEqualityExpr(self: *Parser) Error!*Ast.Expr { +fn parseEqualityExpr(self: *Parser) Error!*ast.Expr { var left = try self.parseRelationalExpr(); while (equalityOp(self.peek())) |op| { _ = self.advance(); @@ -166,7 +166,7 @@ fn parseEqualityExpr(self: *Parser) Error!*Ast.Expr { return left; } -fn parseRelationalExpr(self: *Parser) Error!*Ast.Expr { +fn parseRelationalExpr(self: *Parser) Error!*ast.Expr { var left = try self.parseAdditiveExpr(); while (relationalOp(self.peek())) |op| { _ = self.advance(); @@ -176,7 +176,7 @@ fn parseRelationalExpr(self: *Parser) Error!*Ast.Expr { return left; } -fn parseAdditiveExpr(self: *Parser) Error!*Ast.Expr { +fn parseAdditiveExpr(self: *Parser) Error!*ast.Expr { var left = try self.parseMultExpr(); while (additiveOp(self.peek())) |op| { _ = self.advance(); @@ -188,7 +188,7 @@ fn parseAdditiveExpr(self: *Parser) Error!*Ast.Expr { // After a complete unary expression, `*` is multiply; `div`/`mod` are // operator-position keywords (tokenized as Name). -fn parseMultExpr(self: *Parser) Error!*Ast.Expr { +fn parseMultExpr(self: *Parser) Error!*ast.Expr { var left = try self.parseUnaryExpr(); while (multOp(self.peek())) |op| { _ = self.advance(); @@ -198,7 +198,7 @@ fn parseMultExpr(self: *Parser) Error!*Ast.Expr { return left; } -fn parseUnaryExpr(self: *Parser) Error!*Ast.Expr { +fn parseUnaryExpr(self: *Parser) Error!*ast.Expr { if (self.match(.minus)) { if (self.depth >= max_depth) return error.MaxDepthExceeded; self.depth += 1; @@ -209,7 +209,7 @@ fn parseUnaryExpr(self: *Parser) Error!*Ast.Expr { return self.parseUnionExpr(); } -fn parseUnionExpr(self: *Parser) Error!*Ast.Expr { +fn parseUnionExpr(self: *Parser) Error!*ast.Expr { var left = try self.parsePathExpr(); while (self.match(.pipe)) { const right = try self.parsePathExpr(); @@ -220,7 +220,7 @@ fn parseUnionExpr(self: *Parser) Error!*Ast.Expr { // --- path expressions --- -fn parsePathExpr(self: *Parser) Error!*Ast.Expr { +fn parsePathExpr(self: *Parser) Error!*ast.Expr { const t = self.peek(); if (t == .slash or t == .double_slash) { @@ -245,7 +245,7 @@ fn parsePathExpr(self: *Parser) Error!*Ast.Expr { } if (self.peek() == .slash or self.peek() == .double_slash) { const dsl = self.advance() == .double_slash; - var steps: std.ArrayList(Ast.Step) = .empty; + var steps: std.ArrayList(ast.Step) = .empty; if (dsl) try steps.append(self.arena, descendantOrSelfStep()); try self.parseRelStepsInto(&steps); return try self.makeExpr(.{ .filter_path = .{ @@ -259,8 +259,8 @@ fn parsePathExpr(self: *Parser) Error!*Ast.Expr { return self.parseRelPath(); } -fn parseAbsPath(self: *Parser) Error!*Ast.Expr { - var steps: std.ArrayList(Ast.Step) = .empty; +fn parseAbsPath(self: *Parser) Error!*ast.Expr { + var steps: std.ArrayList(ast.Step) = .empty; if (self.match(.double_slash)) { try steps.append(self.arena, descendantOrSelfStep()); try self.parseRelStepsInto(&steps); @@ -275,8 +275,8 @@ fn parseAbsPath(self: *Parser) Error!*Ast.Expr { } }); } -fn parseRelPath(self: *Parser) Error!*Ast.Expr { - var steps: std.ArrayList(Ast.Step) = .empty; +fn parseRelPath(self: *Parser) Error!*ast.Expr { + var steps: std.ArrayList(ast.Step) = .empty; try self.parseRelStepsInto(&steps); return try self.makeExpr(.{ .path = .{ .absolute = false, @@ -284,7 +284,7 @@ fn parseRelPath(self: *Parser) Error!*Ast.Expr { } }); } -fn parseRelStepsInto(self: *Parser, steps: *std.ArrayList(Ast.Step)) Error!void { +fn parseRelStepsInto(self: *Parser, steps: *std.ArrayList(ast.Step)) Error!void { try steps.append(self.arena, try self.parseStep()); while (self.peek() == .slash or self.peek() == .double_slash) { if (self.advance() == .double_slash) { @@ -301,13 +301,13 @@ fn canStartStep(self: *const Parser) bool { }; } -fn parseStep(self: *Parser) Error!Ast.Step { +fn parseStep(self: *Parser) Error!ast.Step { // Abbreviated steps `.` and `..` carry no axis, node-test, or // predicates — predicates after `.` are a parse error per polyfill. if (self.match(.dot)) return abbreviatedStep(.self); if (self.match(.double_dot)) return abbreviatedStep(.parent); - var axis: Ast.Axis = .child; + var axis: ast.Axis = .child; if (self.match(.at)) { axis = .attribute; } else if (self.peek() == .name and self.lookahead(1) == .double_colon) { @@ -318,7 +318,7 @@ fn parseStep(self: *Parser) Error!Ast.Step { const node_test = try self.parseNodeTest(); - var preds: std.ArrayList(*Ast.Expr) = .empty; + var preds: std.ArrayList(*ast.Expr) = .empty; while (self.match(.lbracket)) { const pred = try self.parseExpr(); _ = try self.expect(.rbracket); @@ -328,7 +328,7 @@ fn parseStep(self: *Parser) Error!Ast.Step { return .{ .axis = axis, .node_test = node_test, .predicates = preds.items }; } -fn parseNodeTest(self: *Parser) Error!Ast.NodeTest { +fn parseNodeTest(self: *Parser) Error!ast.NodeTest { if (self.match(.star)) return .{ .name = "*" }; if (self.peek() != .name) return error.ExpectedNodeTest; @@ -349,7 +349,7 @@ fn parseNodeTest(self: *Parser) Error!Ast.NodeTest { return .{ .name = name }; } -fn parsePrimaryExpr(self: *Parser) Error!*Ast.Expr { +fn parsePrimaryExpr(self: *Parser) Error!*ast.Expr { switch (self.peek()) { .string => |s| { _ = self.advance(); @@ -373,7 +373,7 @@ fn parsePrimaryExpr(self: *Parser) Error!*Ast.Expr { .name => |name| { _ = self.advance(); _ = try self.expect(.lparen); - var args: std.ArrayList(*Ast.Expr) = .empty; + var args: std.ArrayList(*ast.Expr) = .empty; if (self.peek() != .rparen) { try args.append(self.arena, try self.parseExpr()); while (self.match(.comma)) { @@ -389,7 +389,7 @@ fn parsePrimaryExpr(self: *Parser) Error!*Ast.Expr { // --- pure helpers --- -fn equalityOp(t: Token) ?Ast.BinOpKind { +fn equalityOp(t: Token) ?ast.BinOpKind { return switch (t) { .eq => .eq, .neq => .neq, @@ -397,7 +397,7 @@ fn equalityOp(t: Token) ?Ast.BinOpKind { }; } -fn relationalOp(t: Token) ?Ast.BinOpKind { +fn relationalOp(t: Token) ?ast.BinOpKind { return switch (t) { .lt => .lt, .gt => .gt, @@ -407,7 +407,7 @@ fn relationalOp(t: Token) ?Ast.BinOpKind { }; } -fn additiveOp(t: Token) ?Ast.BinOpKind { +fn additiveOp(t: Token) ?ast.BinOpKind { return switch (t) { .plus => .add, .minus => .sub, @@ -415,7 +415,7 @@ fn additiveOp(t: Token) ?Ast.BinOpKind { }; } -fn multOp(t: Token) ?Ast.BinOpKind { +fn multOp(t: Token) ?ast.BinOpKind { return switch (t) { .star => .mul, .name => |name| blk: { @@ -427,7 +427,7 @@ fn multOp(t: Token) ?Ast.BinOpKind { }; } -fn descendantOrSelfStep() Ast.Step { +fn descendantOrSelfStep() ast.Step { return .{ .axis = .descendant_or_self, .node_test = .{ .type_test = .node }, @@ -435,7 +435,7 @@ fn descendantOrSelfStep() Ast.Step { }; } -fn abbreviatedStep(axis: Ast.Axis) Ast.Step { +fn abbreviatedStep(axis: ast.Axis) ast.Step { return .{ .axis = axis, .node_test = .{ .type_test = .node }, @@ -447,18 +447,18 @@ fn isNodeTypeName(name: []const u8) bool { return typeTestKind(name) != null; } -const type_test_lookup = std.StaticStringMap(Ast.TypeTest).initComptime(.{ +const type_test_lookup = std.StaticStringMap(ast.TypeTest).initComptime(.{ .{ "node", .node }, .{ "text", .text }, .{ "comment", .comment }, .{ "processing-instruction", .processing_instruction }, }); -fn typeTestKind(name: []const u8) ?Ast.TypeTest { +fn typeTestKind(name: []const u8) ?ast.TypeTest { return type_test_lookup.get(name); } -const axis_lookup = std.StaticStringMap(Ast.Axis).initComptime(.{ +const axis_lookup = std.StaticStringMap(ast.Axis).initComptime(.{ .{ "child", .child }, .{ "descendant", .descendant }, .{ "descendant-or-self", .descendant_or_self }, @@ -474,7 +474,7 @@ const axis_lookup = std.StaticStringMap(Ast.Axis).initComptime(.{ .{ "namespace", .namespace }, }); -fn parseAxisName(name: []const u8) Ast.Axis { +fn parseAxisName(name: []const u8) ast.Axis { return axis_lookup.get(name) orelse .unknown; } @@ -484,7 +484,7 @@ fn parseAxisName(name: []const u8) Ast.Axis { const testing = std.testing; -fn parseFixture(input: []const u8) !struct { arena: std.heap.ArenaAllocator, expr: *Ast.Expr } { +fn parseFixture(input: []const u8) !struct { arena: std.heap.ArenaAllocator, expr: *ast.Expr } { var arena = std.heap.ArenaAllocator.init(testing.allocator); errdefer arena.deinit(); const expr = try parse(arena.allocator(), input); @@ -538,10 +538,10 @@ test "XPath.Parser: arithmetic precedence — mul binds tighter than add" { defer fx.arena.deinit(); // Expected AST: add(1, mul(2, 3)) const top = fx.expr.binop; - try testing.expectEqual(Ast.BinOpKind.add, top.op); + try testing.expectEqual(ast.BinOpKind.add, top.op); try testing.expectEqual(@as(f64, 1), top.left.number); const mul = top.right.binop; - try testing.expectEqual(Ast.BinOpKind.mul, mul.op); + try testing.expectEqual(ast.BinOpKind.mul, mul.op); try testing.expectEqual(@as(f64, 2), mul.left.number); try testing.expectEqual(@as(f64, 3), mul.right.number); } @@ -551,10 +551,10 @@ test "XPath.Parser: arithmetic left-associativity" { defer fx.arena.deinit(); // Expected AST: sub(sub(1, 2), 3) const top = fx.expr.binop; - try testing.expectEqual(Ast.BinOpKind.sub, top.op); + try testing.expectEqual(ast.BinOpKind.sub, top.op); try testing.expectEqual(@as(f64, 3), top.right.number); const inner = top.left.binop; - try testing.expectEqual(Ast.BinOpKind.sub, inner.op); + try testing.expectEqual(ast.BinOpKind.sub, inner.op); try testing.expectEqual(@as(f64, 1), inner.left.number); try testing.expectEqual(@as(f64, 2), inner.right.number); } @@ -562,21 +562,21 @@ test "XPath.Parser: arithmetic left-associativity" { test "XPath.Parser: div and mod are operator-position keywords" { var fx = try parseFixture("7 div 2"); defer fx.arena.deinit(); - try testing.expectEqual(Ast.BinOpKind.div, fx.expr.binop.op); + try testing.expectEqual(ast.BinOpKind.div, fx.expr.binop.op); var fx2 = try parseFixture("7 mod 2"); defer fx2.arena.deinit(); - try testing.expectEqual(Ast.BinOpKind.mod, fx2.expr.binop.op); + try testing.expectEqual(ast.BinOpKind.mod, fx2.expr.binop.op); } test "XPath.Parser: comparison operators" { inline for (.{ - .{ "1 = 2", Ast.BinOpKind.eq }, - .{ "1 != 2", Ast.BinOpKind.neq }, - .{ "1 < 2", Ast.BinOpKind.lt }, - .{ "1 <= 2", Ast.BinOpKind.lte }, - .{ "1 > 2", Ast.BinOpKind.gt }, - .{ "1 >= 2", Ast.BinOpKind.gte }, + .{ "1 = 2", ast.BinOpKind.eq }, + .{ "1 != 2", ast.BinOpKind.neq }, + .{ "1 < 2", ast.BinOpKind.lt }, + .{ "1 <= 2", ast.BinOpKind.lte }, + .{ "1 > 2", ast.BinOpKind.gt }, + .{ "1 >= 2", ast.BinOpKind.gte }, }) |case| { var fx = try parseFixture(case[0]); defer fx.arena.deinit(); @@ -589,8 +589,8 @@ test "XPath.Parser: logical or/and short-circuit chain" { defer fx.arena.deinit(); // Expected AST: or(path(a), and(path(b), path(c))) — and binds tighter const top = fx.expr.binop; - try testing.expectEqual(Ast.BinOpKind.or_, top.op); - try testing.expectEqual(Ast.BinOpKind.and_, top.right.binop.op); + try testing.expectEqual(ast.BinOpKind.or_, top.op); + try testing.expectEqual(ast.BinOpKind.and_, top.right.binop.op); } test "XPath.Parser: unary minus" { @@ -602,7 +602,7 @@ test "XPath.Parser: unary minus" { test "XPath.Parser: union" { var fx = try parseFixture("a | b"); defer fx.arena.deinit(); - try testing.expectEqual(Ast.BinOpKind.union_, fx.expr.binop.op); + try testing.expectEqual(ast.BinOpKind.union_, fx.expr.binop.op); } test "XPath.Parser: absolute path / alone is document root" { @@ -628,8 +628,8 @@ test "XPath.Parser: //foo expands to descendant-or-self::node()/foo" { const path = fx.expr.path; try testing.expect(path.absolute); try testing.expectEqual(@as(usize, 2), path.steps.len); - try testing.expectEqual(Ast.Axis.descendant_or_self, path.steps[0].axis); - try testing.expectEqual(Ast.TypeTest.node, path.steps[0].node_test.type_test); + try testing.expectEqual(ast.Axis.descendant_or_self, path.steps[0].axis); + try testing.expectEqual(ast.TypeTest.node, path.steps[0].node_test.type_test); try testing.expectEqualStrings("foo", path.steps[1].node_test.name); } @@ -639,7 +639,7 @@ test "XPath.Parser: relative path child::foo/bar" { const path = fx.expr.path; try testing.expect(!path.absolute); try testing.expectEqual(@as(usize, 2), path.steps.len); - try testing.expectEqual(Ast.Axis.child, path.steps[0].axis); + try testing.expectEqual(ast.Axis.child, path.steps[0].axis); try testing.expectEqualStrings("foo", path.steps[0].node_test.name); try testing.expectEqualStrings("bar", path.steps[1].node_test.name); } @@ -649,32 +649,32 @@ test "XPath.Parser: abbreviated steps . and .." { defer fx.arena.deinit(); const path = fx.expr.path; try testing.expectEqual(@as(usize, 2), path.steps.len); - try testing.expectEqual(Ast.Axis.self, path.steps[0].axis); - try testing.expectEqual(Ast.Axis.parent, path.steps[1].axis); + try testing.expectEqual(ast.Axis.self, path.steps[0].axis); + try testing.expectEqual(ast.Axis.parent, path.steps[1].axis); } test "XPath.Parser: attribute axis @class" { var fx = try parseFixture("@class"); defer fx.arena.deinit(); const step = fx.expr.path.steps[0]; - try testing.expectEqual(Ast.Axis.attribute, step.axis); + try testing.expectEqual(ast.Axis.attribute, step.axis); try testing.expectEqualStrings("class", step.node_test.name); } test "XPath.Parser: all 12 named axes parse correctly" { inline for (.{ - .{ "child::a", Ast.Axis.child }, - .{ "descendant::a", Ast.Axis.descendant }, - .{ "descendant-or-self::a", Ast.Axis.descendant_or_self }, - .{ "self::a", Ast.Axis.self }, - .{ "parent::a", Ast.Axis.parent }, - .{ "ancestor::a", Ast.Axis.ancestor }, - .{ "ancestor-or-self::a", Ast.Axis.ancestor_or_self }, - .{ "following-sibling::a", Ast.Axis.following_sibling }, - .{ "preceding-sibling::a", Ast.Axis.preceding_sibling }, - .{ "following::a", Ast.Axis.following }, - .{ "preceding::a", Ast.Axis.preceding }, - .{ "namespace::a", Ast.Axis.namespace }, + .{ "child::a", ast.Axis.child }, + .{ "descendant::a", ast.Axis.descendant }, + .{ "descendant-or-self::a", ast.Axis.descendant_or_self }, + .{ "self::a", ast.Axis.self }, + .{ "parent::a", ast.Axis.parent }, + .{ "ancestor::a", ast.Axis.ancestor }, + .{ "ancestor-or-self::a", ast.Axis.ancestor_or_self }, + .{ "following-sibling::a", ast.Axis.following_sibling }, + .{ "preceding-sibling::a", ast.Axis.preceding_sibling }, + .{ "following::a", ast.Axis.following }, + .{ "preceding::a", ast.Axis.preceding }, + .{ "namespace::a", ast.Axis.namespace }, }) |case| { var fx = try parseFixture(case[0]); defer fx.arena.deinit(); @@ -685,7 +685,7 @@ test "XPath.Parser: all 12 named axes parse correctly" { test "XPath.Parser: unknown axis name maps to .unknown — polyfill parity" { var fx = try parseFixture("wibble::a"); defer fx.arena.deinit(); - try testing.expectEqual(Ast.Axis.unknown, fx.expr.path.steps[0].axis); + try testing.expectEqual(ast.Axis.unknown, fx.expr.path.steps[0].axis); } test "XPath.Parser: wildcard *" { @@ -706,10 +706,10 @@ test "XPath.Parser: namespace-prefixed name and wildcard" { test "XPath.Parser: node-type tests" { inline for (.{ - .{ "node()", Ast.TypeTest.node }, - .{ "text()", Ast.TypeTest.text }, - .{ "comment()", Ast.TypeTest.comment }, - .{ "processing-instruction()", Ast.TypeTest.processing_instruction }, + .{ "node()", ast.TypeTest.node }, + .{ "text()", ast.TypeTest.text }, + .{ "comment()", ast.TypeTest.comment }, + .{ "processing-instruction()", ast.TypeTest.processing_instruction }, }) |case| { var fx = try parseFixture(case[0]); defer fx.arena.deinit(); @@ -720,7 +720,7 @@ test "XPath.Parser: node-type tests" { test "XPath.Parser: processing-instruction with literal target — consumed but ignored" { var fx = try parseFixture("processing-instruction('xml-stylesheet')"); defer fx.arena.deinit(); - try testing.expectEqual(Ast.TypeTest.processing_instruction, fx.expr.path.steps[0].node_test.type_test); + try testing.expectEqual(ast.TypeTest.processing_instruction, fx.expr.path.steps[0].node_test.type_test); } test "XPath.Parser: predicate on step" { @@ -770,7 +770,7 @@ test "XPath.Parser: filter with // tail prepends descendant-or-self" { defer fx.arena.deinit(); const fp = fx.expr.filter_path; try testing.expectEqual(@as(usize, 2), fp.steps.len); - try testing.expectEqual(Ast.Axis.descendant_or_self, fp.steps[0].axis); + try testing.expectEqual(ast.Axis.descendant_or_self, fp.steps[0].axis); try testing.expectEqualStrings("b", fp.steps[1].node_test.name); } @@ -788,7 +788,7 @@ test "XPath.Parser: complex representative expression" { const path = fx.expr.path; try testing.expect(path.absolute); try testing.expectEqual(@as(usize, 3), path.steps.len); - try testing.expectEqual(Ast.Axis.descendant_or_self, path.steps[0].axis); + try testing.expectEqual(ast.Axis.descendant_or_self, path.steps[0].axis); try testing.expectEqualStrings("div", path.steps[1].node_test.name); try testing.expectEqual(@as(usize, 1), path.steps[1].predicates.len); try testing.expectEqualStrings("p", path.steps[2].node_test.name); diff --git a/src/browser/xpath/functions.zig b/src/browser/xpath/functions.zig index 52cb4d14..973bd53a 100644 --- a/src/browser/xpath/functions.zig +++ b/src/browser/xpath/functions.zig @@ -41,7 +41,7 @@ const lp = @import("lightpanda"); const Node = @import("../webapi/Node.zig"); -const Result = @import("result.zig"); +const result = @import("result.zig"); const Frame = lp.Frame; const Element = Node.Element; @@ -62,10 +62,10 @@ pub const Error = error{ pub fn call( arena: Allocator, name: []const u8, - args: []const Result.Result, + args: []const result.Result, ctx: *Node, frame: *Frame, -) Error!Result.Result { +) Error!result.Result { // -- Node-set -- if (eql(name, "count")) return .{ .number = countFn(args) }; if (eql(name, "id")) return idFn(arena, args, ctx, frame); @@ -86,8 +86,8 @@ pub fn call( if (eql(name, "translate")) return .{ .string = try translateFn(arena, args) }; // -- Boolean -- - if (eql(name, "boolean")) return .{ .boolean = if (args.len == 0) false else Result.toBoolean(args[0]) }; - if (eql(name, "not")) return .{ .boolean = if (args.len == 0) true else !Result.toBoolean(args[0]) }; + if (eql(name, "boolean")) return .{ .boolean = if (args.len == 0) false else result.toBoolean(args[0]) }; + if (eql(name, "not")) return .{ .boolean = if (args.len == 0) true else !result.toBoolean(args[0]) }; if (eql(name, "true")) return .{ .boolean = true }; if (eql(name, "false")) return .{ .boolean = false }; if (eql(name, "lang")) return .{ .boolean = false }; @@ -95,9 +95,9 @@ pub fn call( // -- Number -- if (eql(name, "number")) return .{ .number = try numberFn(arena, args, ctx) }; if (eql(name, "sum")) return .{ .number = try sumFn(arena, args) }; - if (eql(name, "floor")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.floor(try Result.toNumber(arena, args[0])) }; - if (eql(name, "ceiling")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.ceil(try Result.toNumber(arena, args[0])) }; - if (eql(name, "round")) return .{ .number = if (args.len == 0) std.math.nan(f64) else roundHalfToPosInf(try Result.toNumber(arena, args[0])) }; + if (eql(name, "floor")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.floor(try result.toNumber(arena, args[0])) }; + if (eql(name, "ceiling")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.ceil(try result.toNumber(arena, args[0])) }; + if (eql(name, "round")) return .{ .number = if (args.len == 0) std.math.nan(f64) else roundHalfToPosInf(try result.toNumber(arena, args[0])) }; return error.UnknownFunction; } @@ -108,12 +108,12 @@ inline fn eql(a: []const u8, b: []const u8) bool { // ----- node-set fns ----- -fn countFn(args: []const Result.Result) f64 { +fn countFn(args: []const result.Result) f64 { if (args.len == 0 or args[0] != .node_set) return 0; return @floatFromInt(args[0].node_set.len); } -fn idFn(arena: Allocator, args: []const Result.Result, ctx: *Node, frame: *Frame) Error!Result.Result { +fn idFn(arena: Allocator, args: []const result.Result, ctx: *Node, frame: *Frame) Error!result.Result { if (args.len == 0) return .{ .node_set = &.{} }; // Polyfill: node-set arg → join `stringVal(n)` of each by ' '. Scalar @@ -123,12 +123,12 @@ fn idFn(arena: Allocator, args: []const Result.Result, ctx: *Node, frame: *Frame var buf = std.Io.Writer.Allocating.init(arena); for (args[0].node_set, 0..) |n, i| { if (i > 0) try buf.writer.writeByte(' '); - const sv = try Result.stringValueOf(arena, n); + const sv = try result.stringValueOf(arena, n); try buf.writer.writeAll(sv); } break :blk buf.written(); } - break :blk try Result.toString(arena, args[0]); + break :blk try result.toString(arena, args[0]); }; // `ctx.ownerDocument || ctx` — document nodes own themselves. @@ -144,7 +144,7 @@ fn idFn(arena: Allocator, args: []const Result.Result, ctx: *Node, frame: *Frame return .{ .node_set = seen.keys() }; } -fn localNameFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]const u8 { +fn localNameFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 { const node = firstNodeOrCtx(args, ctx) orelse return ""; // For Element, `getLocalName` returns a slice into `_tag_name` // (lowercase, namespace-prefix stripped) — lifetime exceeds the @@ -154,7 +154,7 @@ fn localNameFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error! return std.ascii.allocLowerString(arena, node.getNodeName(&buf)); } -fn nameFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]const u8 { +fn nameFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 { const node = firstNodeOrCtx(args, ctx) orelse return ""; // Diverges from `local-name` only on namespaced elements: `name` // keeps the prefix (`ns:foo`), `local-name` strips it (`foo`). @@ -163,7 +163,7 @@ fn nameFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]con return std.ascii.allocLowerString(arena, node.getNodeName(&buf)); } -fn firstNodeOrCtx(args: []const Result.Result, ctx: *Node) ?*Node { +fn firstNodeOrCtx(args: []const result.Result, ctx: *Node) ?*Node { if (args.len == 0) return ctx; if (args[0] != .node_set) return null; if (args[0].node_set.len == 0) return null; @@ -172,64 +172,64 @@ fn firstNodeOrCtx(args: []const Result.Result, ctx: *Node) ?*Node { // ----- string fns ----- -fn stringFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]const u8 { - if (args.len == 0) return try Result.stringValueOf(arena, ctx); - return try Result.toString(arena, args[0]); +fn stringFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 { + if (args.len == 0) return try result.stringValueOf(arena, ctx); + return try result.toString(arena, args[0]); } -fn concatFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { +fn concatFn(arena: Allocator, args: []const result.Result) Error![]const u8 { var buf = std.Io.Writer.Allocating.init(arena); for (args) |a| { - const s = try Result.toString(arena, a); + const s = try result.toString(arena, a); try buf.writer.writeAll(s); } return buf.written(); } -fn startsWithFn(arena: Allocator, args: []const Result.Result) Error!bool { +fn startsWithFn(arena: Allocator, args: []const result.Result) Error!bool { if (args.len < 2) return false; - const s1 = try Result.toString(arena, args[0]); - const s2 = try Result.toString(arena, args[1]); + const s1 = try result.toString(arena, args[0]); + const s2 = try result.toString(arena, args[1]); return std.mem.startsWith(u8, s1, s2); } -fn containsFn(arena: Allocator, args: []const Result.Result) Error!bool { +fn containsFn(arena: Allocator, args: []const result.Result) Error!bool { if (args.len < 2) return false; - const s1 = try Result.toString(arena, args[0]); - const s2 = try Result.toString(arena, args[1]); + const s1 = try result.toString(arena, args[0]); + const s2 = try result.toString(arena, args[1]); return std.mem.indexOf(u8, s1, s2) != null; } -fn substringBeforeFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { +fn substringBeforeFn(arena: Allocator, args: []const result.Result) Error![]const u8 { if (args.len < 2) return ""; - const s1 = try Result.toString(arena, args[0]); - const s2 = try Result.toString(arena, args[1]); + const s1 = try result.toString(arena, args[0]); + const s2 = try result.toString(arena, args[1]); if (std.mem.indexOf(u8, s1, s2)) |idx| { return s1[0..idx]; } return ""; } -fn substringAfterFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { +fn substringAfterFn(arena: Allocator, args: []const result.Result) Error![]const u8 { if (args.len < 2) return ""; - const s1 = try Result.toString(arena, args[0]); - const s2 = try Result.toString(arena, args[1]); + const s1 = try result.toString(arena, args[0]); + const s2 = try result.toString(arena, args[1]); if (std.mem.indexOf(u8, s1, s2)) |idx| { return s1[idx + s2.len ..]; } return ""; } -fn substringFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { +fn substringFn(arena: Allocator, args: []const result.Result) Error![]const u8 { if (args.len < 2) return ""; - const s = try Result.toString(arena, args[0]); - const start_raw = try Result.toNumber(arena, args[1]); + const s = try result.toString(arena, args[0]); + const start_raw = try result.toNumber(arena, args[1]); if (std.math.isNan(start_raw)) return ""; const start = roundHalfToPosInf(start_raw); const s_len: f64 = @floatFromInt(s.len); if (args.len >= 3) { - const len_raw = try Result.toNumber(arena, args[2]); + const len_raw = try result.toNumber(arena, args[2]); if (std.math.isNan(len_raw)) return ""; const len = roundHalfToPosInf(len_raw); const sum = start - 1 + len; @@ -249,22 +249,22 @@ fn substringFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { return s[si..]; } -fn stringLengthFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error!f64 { +fn stringLengthFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error!f64 { const s = if (args.len == 0) - try Result.stringValueOf(arena, ctx) + try result.stringValueOf(arena, ctx) else - try Result.toString(arena, args[0]); + try result.toString(arena, args[0]); // Polyfill returns UTF-16 code units; we return UTF-8 bytes. They // agree on ASCII (the gem's 91-case battery is ASCII-only). See // .claude/skills/xpath-port/NOTES.md for the divergence rationale. return @floatFromInt(s.len); } -fn normalizeSpaceFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error![]const u8 { +fn normalizeSpaceFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 { const s = if (args.len == 0) - try Result.stringValueOf(arena, ctx) + try result.stringValueOf(arena, ctx) else - try Result.toString(arena, args[0]); + try result.toString(arena, args[0]); const trimmed = std.mem.trim(u8, s, &std.ascii.whitespace); if (trimmed.len == 0) return ""; @@ -283,11 +283,11 @@ fn normalizeSpaceFn(arena: Allocator, args: []const Result.Result, ctx: *Node) E return buf.written(); } -fn translateFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { +fn translateFn(arena: Allocator, args: []const result.Result) Error![]const u8 { if (args.len < 3) return ""; - const s = try Result.toString(arena, args[0]); - const from = try Result.toString(arena, args[1]); - const to = try Result.toString(arena, args[2]); + const s = try result.toString(arena, args[0]); + const from = try result.toString(arena, args[1]); + const to = try result.toString(arena, args[2]); var buf = std.Io.Writer.Allocating.init(arena); for (s) |c| { @@ -303,20 +303,20 @@ fn translateFn(arena: Allocator, args: []const Result.Result) Error![]const u8 { // ----- number fns ----- -fn numberFn(arena: Allocator, args: []const Result.Result, ctx: *Node) Error!f64 { +fn numberFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error!f64 { if (args.len == 0) { - const sv = try Result.stringValueOf(arena, ctx); - return Result.stringToNumber(sv); + const sv = try result.stringValueOf(arena, ctx); + return result.stringToNumber(sv); } - return try Result.toNumber(arena, args[0]); + return try result.toNumber(arena, args[0]); } -fn sumFn(arena: Allocator, args: []const Result.Result) Error!f64 { +fn sumFn(arena: Allocator, args: []const result.Result) Error!f64 { if (args.len == 0 or args[0] != .node_set) return std.math.nan(f64); var total: f64 = 0; for (args[0].node_set) |n| { - const sv = try Result.stringValueOf(arena, n); - total += Result.stringToNumber(sv); + const sv = try result.stringValueOf(arena, n); + total += result.stringToNumber(sv); } return total; } @@ -342,7 +342,7 @@ const Tokenizer = @import("Tokenizer.zig"); const Parser = @import("Parser.zig"); const Evaluator = @import("Evaluator.zig"); -fn evalScalar(a: Allocator, src: []const u8) !Result.Result { +fn evalScalar(a: Allocator, src: []const u8) !result.Result { const expr = try Parser.parse(a, src); // Synthetic Frame/Node pointers — the public `evaluate` entry only // touches the Frame for path/axis evaluation. Pure-scalar expressions From 0b0a34c4a24be37e8158129c84627c3a5ab052e9 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Fri, 8 May 2026 08:42:07 +0200 Subject: [PATCH 11/34] cdp: match closed set of axis names in isXPathQuery The previous `::` heuristic accepted any identifier-like character before `::`, which misrouted CSS pseudo-elements (`a::before`, `div::after`) to the XPath evaluator. Walk back the run of [a-zA-Z-] characters and look the candidate up in a StaticStringMap of the 13 XPath 1.0 named axes, so only real axis names match. --- src/cdp/domains/dom.zig | 45 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/src/cdp/domains/dom.zig b/src/cdp/domains/dom.zig index 32af266c..9f7e9eac 100644 --- a/src/cdp/domains/dom.zig +++ b/src/cdp/domains/dom.zig @@ -92,6 +92,26 @@ fn getDocument(cmd: *CDP.Command) !void { return cmd.sendResult(.{ .root = bc.nodeWriter(node, .{ .depth = params.depth }) }, .{}); } +// Closed set of XPath 1.0 named axes. Matched literally before `::` so +// CSS pseudo-elements (`a::before`, `div::first-line`) don't get +// misrouted to the XPath evaluator just because they have an +// identifier-looking word before `::`. +const xpath_axis_names = std.StaticStringMap(void).initComptime(.{ + .{ "child", {} }, + .{ "descendant", {} }, + .{ "descendant-or-self", {} }, + .{ "self", {} }, + .{ "parent", {} }, + .{ "ancestor", {} }, + .{ "ancestor-or-self", {} }, + .{ "following-sibling", {} }, + .{ "preceding-sibling", {} }, + .{ "following", {} }, + .{ "preceding", {} }, + .{ "attribute", {} }, + .{ "namespace", {} }, +}); + // Polyfill-parity heuristic (decision #2/#9): treat the query as XPath // when it begins with a path operator or contains an axis specifier; // otherwise fall through to CSS. Lifted from capybara-lightpanda's @@ -104,14 +124,21 @@ fn isXPathQuery(q: []const u8) bool { if (q[1] == '/') return true; if (q[1] == '.' and q.len > 2 and q[2] == '/') return true; } - // Require axis-name shape immediately before `::` so CSS pseudo-elements - // (`a::before`) and attribute values containing `::` (`[data-x="x::y"]`) - // aren't misrouted to the XPath evaluator. + // For `::` to be an XPath axis separator, the identifier immediately + // before it must be one of the 13 named axes. Walk back the run of + // [a-zA-Z-] characters and look it up in the closed set. var idx: usize = 0; while (std.mem.indexOfPos(u8, q, idx, "::")) |hit| : (idx = hit + 1) { if (hit == 0) continue; - const c = q[hit - 1]; - if ((c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '-') return true; + var start = hit; + while (start > 0) { + const c = q[start - 1]; + const is_axis_char = (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '-'; + if (!is_axis_char) break; + start -= 1; + } + if (start == hit) continue; + if (xpath_axis_names.has(q[start..hit])) return true; } return false; } @@ -711,6 +738,14 @@ test "cdp.dom: isXPathQuery heuristic" { try std.testing.expect(!isXPathQuery("[data-x]")); try std.testing.expect(!isXPathQuery("(p)")); // parens without path → CSS try std.testing.expect(!isXPathQuery(".x")); // leading dot without / + + // CSS pseudo-elements: identifier before `::` is not an XPath axis name. + try std.testing.expect(!isXPathQuery("a::before")); + try std.testing.expect(!isXPathQuery("div::after")); + try std.testing.expect(!isXPathQuery("p::first-line")); + try std.testing.expect(!isXPathQuery("input::placeholder")); + // Attribute selector with `::` inside a literal — nothing axis-like before it. + try std.testing.expect(!isXPathQuery("[data-x=\"x::y\"]")); } test "cdp.dom: querySelector unknown search id" { From d8b9391e337511f8d1b66d212c34c79549e3d6f4 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Fri, 8 May 2026 08:58:07 +0200 Subject: [PATCH 12/34] xpath: drop internal references from comments Strip mentions of the private gem and its internal paths from xpath module docstrings, the conformance test header, and the dom dispatch heuristic. Comments now describe behavior directly without pointing at sources public readers can't access. --- .../tests/xpath/xpath_conformance.html | 5 ++--- src/browser/xpath/Evaluator.zig | 20 +++++++++---------- src/browser/xpath/Parser.zig | 17 ++++++++-------- src/browser/xpath/Tokenizer.zig | 2 -- src/browser/xpath/ast.zig | 6 ++---- src/browser/xpath/functions.zig | 20 +++++++++---------- src/browser/xpath/result.zig | 10 ++++------ src/cdp/domains/dom.zig | 7 +++---- 8 files changed, 37 insertions(+), 50 deletions(-) diff --git a/src/browser/tests/xpath/xpath_conformance.html b/src/browser/tests/xpath/xpath_conformance.html index f200ecbb..7080ecb6 100644 --- a/src/browser/tests/xpath/xpath_conformance.html +++ b/src/browser/tests/xpath/xpath_conformance.html @@ -46,9 +46,8 @@ + + diff --git a/src/browser/tests/element/selector_invalid.html b/src/browser/tests/element/selector_invalid.html index c0d16d59..7112ab85 100644 --- a/src/browser/tests/element/selector_invalid.html +++ b/src/browser/tests/element/selector_invalid.html @@ -10,9 +10,9 @@ const container = $('#container'); // Empty functional pseudo-classes should error - testing.expectError("Error: InvalidPseudoClass", () => container.querySelector(':has()')); - testing.expectError("Error: InvalidPseudoClass", () => container.querySelector(':not()')); - testing.expectError("Error: InvalidPseudoClass", () => container.querySelector(':lang()')); + testing.expectError("SyntaxError", () => container.querySelector(':has()')); + testing.expectError("SyntaxError", () => container.querySelector(':not()')); + testing.expectError("SyntaxError", () => container.querySelector(':lang()')); } @@ -21,9 +21,9 @@ const container = $('#container'); // Invalid nth patterns - testing.expectError("Error: InvalidNthPattern", () => container.querySelector(':nth-child(foo)')); - testing.expectError("Error: InvalidNthPattern", () => container.querySelector(':nth-child(-)')); - testing.expectError("Error: InvalidNthPattern", () => container.querySelector(':nth-child(+)')); + testing.expectError("SyntaxError", () => container.querySelector(':nth-child(foo)')); + testing.expectError("SyntaxError", () => container.querySelector(':nth-child(-)')); + testing.expectError("SyntaxError", () => container.querySelector(':nth-child(+)')); } @@ -32,9 +32,9 @@ const container = $('#container'); // Unknown pseudo-classes - testing.expectError("Error: UnknownPseudoClass", () => container.querySelector(':unknown')); - testing.expectError("Error: UnknownPseudoClass", () => container.querySelector(':not-a-real-pseudo')); - testing.expectError("Error: UnknownPseudoClass", () => container.querySelector(':fake(test)')); + testing.expectError("SyntaxError", () => container.querySelector(':unknown')); + testing.expectError("SyntaxError", () => container.querySelector(':not-a-real-pseudo')); + testing.expectError("SyntaxError", () => container.querySelector(':fake(test)')); } @@ -53,8 +53,8 @@ const container = $('#container'); // Combinators with nothing after - testing.expectError("Error: InvalidSelector", () => container.querySelector('p >')); - testing.expectError("Error: InvalidSelector", () => container.querySelector('p +')); - testing.expectError("Error: InvalidSelector", () => container.querySelector('p ~')); + testing.expectError("SyntaxError", () => container.querySelector('p >')); + testing.expectError("SyntaxError", () => container.querySelector('p +')); + testing.expectError("SyntaxError", () => container.querySelector('p ~')); } diff --git a/src/browser/tests/page/meta.html b/src/browser/tests/page/meta.html index 3c03f403..98fb1688 100644 --- a/src/browser/tests/page/meta.html +++ b/src/browser/tests/page/meta.html @@ -30,7 +30,8 @@ testing.expectEqual('undefined', typeof plainDoc.scripts); testing.expectEqual('undefined', typeof plainDoc.links); testing.expectEqual('undefined', typeof plainDoc.forms); - testing.expectEqual('undefined', typeof plainDoc.location); + // location lives on Document (returns null for non-HTMLDocument). + testing.expectEqual(null, plainDoc.location); // Both should have common Document properties testing.expectEqual('string', typeof document.URL); diff --git a/src/browser/webapi/DOMImplementation.zig b/src/browser/webapi/DOMImplementation.zig index 777a9571..280db6a8 100644 --- a/src/browser/webapi/DOMImplementation.zig +++ b/src/browser/webapi/DOMImplementation.zig @@ -78,7 +78,7 @@ pub fn createDocument(_: *const DOMImplementation, namespace_: ?[]const u8, qual // Create and append root element if qualified_name provided if (qualified_name) |qname| { if (qname.len > 0) { - const namespace = if (namespace_) |ns| Node.Element.Namespace.parse(ns) else .xml; + const namespace = Node.Element.Namespace.parse(namespace_); const root = try frame.createElementNS(namespace, qname, null); _ = try document.asNode().appendChild(root, frame); } diff --git a/src/browser/webapi/Document.zig b/src/browser/webapi/Document.zig index 747e08c2..95c57793 100644 --- a/src/browser/webapi/Document.zig +++ b/src/browser/webapi/Document.zig @@ -119,7 +119,18 @@ pub fn asEventTarget(self: *Document) *@import("EventTarget.zig") { } pub fn getURL(self: *const Document, frame: *const Frame) [:0]const u8 { - return self._url orelse frame.url; + return self._url orelse (self._frame orelse frame).url; +} + +pub fn getLocation(self: *const Document) ?*Location { + if (self._type != .html) return null; + const doc_frame = self._frame orelse return null; + return doc_frame.window._location; +} + +pub fn setLocation(self: *Document, url: [:0]const u8, frame: *Frame) !void { + if (self._type != .html) return; + return frame.scheduleNavigation(url, .{ .reason = .script, .kind = .{ .push = null } }, .{ .script = self._frame }); } pub fn getContentType(self: *const Document) []const u8 { @@ -277,11 +288,11 @@ pub fn getSelection(self: *Document) *Selection { } pub fn querySelector(self: *Document, input: String, frame: *Frame) !?*Element { - return Selector.querySelector(self.asNode(), input.str(), frame); + return Selector.querySelector(self.asNode(), input.str(), frame) catch |err| Selector.mapErrorToDOM(err); } pub fn querySelectorAll(self: *Document, input: String, frame: *Frame) !*Selector.List { - return Selector.querySelectorAll(self.asNode(), input.str(), frame); + return Selector.querySelectorAll(self.asNode(), input.str(), frame) catch |err| Selector.mapErrorToDOM(err); } pub fn getImplementation(self: *Document, frame: *Frame) !*DOMImplementation { @@ -465,15 +476,21 @@ pub fn getFonts(self: *Document, frame: *Frame) !*FontFaceSet { return fonts; } -pub fn adoptNode(_: *const Document, node: *Node, frame: *Frame) !*Node { +pub fn adoptNode(self: *Document, node: *Node, frame: *Frame) !*Node { if (node._type == .document) { return error.NotSupported; } + const old_owner = node.ownerDocument(frame) orelse frame.document; + if (node._parent) |parent| { frame.removeNode(parent, node, .{ .will_be_reconnected = false }); } + if (old_owner != self) { + try frame.adoptNodeTree(node, old_owner, self); + } + return node; } @@ -1029,6 +1046,7 @@ pub const JsApi = struct { pub const onselectionchange = bridge.accessor(Document.getOnSelectionChange, Document.setOnSelectionChange, .{}); pub const URL = bridge.accessor(Document.getURL, null, .{}); + pub const location = bridge.accessor(Document.getLocation, Document.setLocation, .{}); pub const documentURI = bridge.accessor(Document.getURL, null, .{}); pub const documentElement = bridge.accessor(Document.getDocumentElement, null, .{}); pub const scrollingElement = bridge.accessor(Document.getDocumentElement, null, .{}); diff --git a/src/browser/webapi/DocumentFragment.zig b/src/browser/webapi/DocumentFragment.zig index 186bc68a..b55050f2 100644 --- a/src/browser/webapi/DocumentFragment.zig +++ b/src/browser/webapi/DocumentFragment.zig @@ -84,11 +84,11 @@ pub fn getElementById(self: *DocumentFragment, id: []const u8) ?*Element { } pub fn querySelector(self: *DocumentFragment, selector: []const u8, frame: *Frame) !?*Element { - return Selector.querySelector(self.asNode(), selector, frame); + return Selector.querySelector(self.asNode(), selector, frame) catch |err| Selector.mapErrorToDOM(err); } pub fn querySelectorAll(self: *DocumentFragment, input: []const u8, frame: *Frame) !*Selector.List { - return Selector.querySelectorAll(self.asNode(), input, frame); + return Selector.querySelectorAll(self.asNode(), input, frame) catch |err| Selector.mapErrorToDOM(err); } pub fn getChildren(self: *DocumentFragment, frame: *Frame) !collections.NodeLive(.child_elements) { diff --git a/src/browser/webapi/Element.zig b/src/browser/webapi/Element.zig index 4de1a732..058875a6 100644 --- a/src/browser/webapi/Element.zig +++ b/src/browser/webapi/Element.zig @@ -1071,15 +1071,15 @@ pub fn getChildElementCount(self: *Element) usize { } pub fn matches(self: *Element, selector: []const u8, frame: *Frame) !bool { - return Selector.matches(self, selector, frame); + return Selector.matches(self, selector, frame) catch |err| Selector.mapErrorToDOM(err); } pub fn querySelector(self: *Element, selector: []const u8, frame: *Frame) !?*Element { - return Selector.querySelector(self.asNode(), selector, frame); + return Selector.querySelector(self.asNode(), selector, frame) catch |err| Selector.mapErrorToDOM(err); } pub fn querySelectorAll(self: *Element, input: []const u8, frame: *Frame) !*Selector.List { - return Selector.querySelectorAll(self.asNode(), input, frame); + return Selector.querySelectorAll(self.asNode(), input, frame) catch |err| Selector.mapErrorToDOM(err); } pub fn getAnimations(_: *const Element) []*Animation { diff --git a/src/browser/webapi/HTMLDocument.zig b/src/browser/webapi/HTMLDocument.zig index 41782cc8..19e462a1 100644 --- a/src/browser/webapi/HTMLDocument.zig +++ b/src/browser/webapi/HTMLDocument.zig @@ -196,15 +196,6 @@ pub fn getCurrentScript(self: *const HTMLDocument) ?*Element.Html.Script { return self._proto._current_script; } -pub fn getLocation(self: *const HTMLDocument) ?*@import("Location.zig") { - const frame = self._proto._frame orelse return null; - return frame.window._location; -} - -pub fn setLocation(self: *HTMLDocument, url: [:0]const u8, frame: *Frame) !void { - return frame.scheduleNavigation(url, .{ .reason = .script, .kind = .{ .push = null } }, .{ .script = self._proto._frame }); -} - pub fn getDir(self: *HTMLDocument) []const u8 { const el = self._proto.getDocumentElement() orelse return ""; const html = el.is(Element.Html) orelse return ""; @@ -311,7 +302,6 @@ pub const JsApi = struct { pub const applets = bridge.accessor(HTMLDocument.getApplets, null, .{}); pub const plugins = bridge.accessor(HTMLDocument.getEmbeds, null, .{}); pub const currentScript = bridge.accessor(HTMLDocument.getCurrentScript, null, .{}); - pub const location = bridge.accessor(HTMLDocument.getLocation, HTMLDocument.setLocation, .{}); pub const all = bridge.accessor(HTMLDocument.getAll, null, .{}); pub const cookie = bridge.accessor(HTMLDocument.getCookie, HTMLDocument.setCookie, .{}); pub const doctype = bridge.accessor(HTMLDocument.getDocType, null, .{}); diff --git a/src/browser/webapi/Node.zig b/src/browser/webapi/Node.zig index 7df1fd6a..c26411ed 100644 --- a/src/browser/webapi/Node.zig +++ b/src/browser/webapi/Node.zig @@ -166,7 +166,7 @@ pub fn findAdjacentNodes(self: *Node, position: []const u8) !struct { *Node, ?*N // Returned if: // * position is not one of the four listed values. // * The input is XML that is not well-formed. - return error.Syntax; + return error.SyntaxError; } pub fn firstChild(self: *const Node) ?*Node { diff --git a/src/browser/webapi/selector/Selector.zig b/src/browser/webapi/selector/Selector.zig index 2591ce6c..7322e02e 100644 --- a/src/browser/webapi/selector/Selector.zig +++ b/src/browser/webapi/selector/Selector.zig @@ -28,6 +28,22 @@ pub const List = @import("List.zig"); const String = lp.String; const Allocator = std.mem.Allocator; +// translate a Selector error to a DOMException known type. +pub fn mapErrorToDOM(err: anyerror) anyerror { + return switch (err) { + error.InvalidSelector, + error.InvalidAttributeSelector, + error.InvalidIDSelector, + error.InvalidClassSelector, + error.UnknownPseudoClass, + error.InvalidTagSelector, + error.InvalidPseudoClass, + error.InvalidNthPattern, + => error.SyntaxError, + else => err, + }; +} + pub fn parseLeaky(arena: Allocator, input: []const u8) !Parsed { if (input.len == 0) { return error.SyntaxError; From 15101f12e4d548117372f47223f2049c7458d91f Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Sat, 9 May 2026 21:27:30 +0200 Subject: [PATCH 14/34] parser: defer raw-text merge to bound memory growth Frame.appendNew did String.concat(arena, [existing, txt]) every time html5ever flushed a script-data/rawtext chunk on a '<' token, allocating O(N) on the page-lifetime arena per chunk. Total bytes ~= N^2/(2*c). On apple.com US iPhone pages a 347 KB inline JSON literal with embedded HTML strings ballooned the parse to 3.5 GB peak RSS. Move the merge into the parser. Same-parent text chunks accumulate in a std.ArrayListUnmanaged on the per-parse arena; one String.dupe lands the final value on the frame arena. Flush points are the natural ends of a text run: a non-text child appended, a foster/before-sibling insertion, the parent element popping, or the parse call returning. Frame.appendNew now takes *Node directly; it had no non-parser callers. Streaming.done returns !void to propagate the final flush. Refs #2397 --- src/browser/Frame.zig | 26 ++--- src/browser/parser/Parser.zig | 95 ++++++++++++++++++- src/browser/tests/cdata/raw_text_chunked.html | 71 ++++++++++++++ src/browser/webapi/Document.zig | 2 +- 4 files changed, 169 insertions(+), 25 deletions(-) create mode 100644 src/browser/tests/cdata/raw_text_chunked.html diff --git a/src/browser/Frame.zig b/src/browser/Frame.zig index dd583022..32230263 100644 --- a/src/browser/Frame.zig +++ b/src/browser/Frame.zig @@ -1813,26 +1813,12 @@ pub fn notifyNetworkAlmostIdle(self: *Frame) void { }); } -// called from the parser -pub fn appendNew(self: *Frame, parent: *Node, child: Node.NodeOrText) !void { - const node = switch (child) { - .node => |n| n, - .text => |txt| blk: { - // If we're appending this adjacently to a text node, we should merge - if (parent.lastChild()) |sibling| { - if (sibling.is(CData.Text)) |tn| { - const cdata = tn._proto; - const existing = cdata.getData().str(); - cdata._data = try String.concat(self.arena, &.{ existing, txt }); - return; - } - } - break :blk try self.createTextNode(txt); - }, - }; - - lp.assert(node._parent == null, "Frame.appendNew", .{}); - try self._insertNodeRelative(true, parent, node, .append, .{ +// called from the parser. Text-node merging is the parser's responsibility +// (see Parser.appendTextChunk in src/browser/parser/Parser.zig); this is the +// "insert this fully-formed node as a new last child of parent" entry point. +pub fn appendNew(self: *Frame, parent: *Node, child: *Node) !void { + lp.assert(child._parent == null, "Frame.appendNew", .{}); + try self._insertNodeRelative(true, parent, child, .append, .{ // this opts has no meaning since we're passing `true` as the first // parameter, which indicates this comes from the parser, and has its // own special processing. Still, set it to be clear. diff --git a/src/browser/parser/Parser.zig b/src/browser/parser/Parser.zig index 44756aed..f4d58e85 100644 --- a/src/browser/parser/Parser.zig +++ b/src/browser/parser/Parser.zig @@ -23,6 +23,7 @@ const h5e = @import("html5ever.zig"); const Frame = @import("../Frame.zig"); const Node = @import("../webapi/Node.zig"); const Element = @import("../webapi/Element.zig"); +const CData = @import("../webapi/CData.zig"); pub const AttributeIterator = h5e.AttributeIterator; @@ -39,6 +40,19 @@ pub const ParsedNode = struct { data: ?*anyopaque, }; +// html5ever's tokenizer flushes the script-data character buffer on every '<' +// (script-data-less-than-sign-state transition), which produces a separate +// AppendText callback per chunk. Merging via String.concat in the previous +// implementation was O(N^2/chunk_size) on the page-lifetime arena, blowing +// memory on inline JS that contains embedded HTML strings (issue #2397). +// Instead, we accumulate same-parent chunks in this struct and commit a +// single allocation on flush. +const PendingText = struct { + parent: *Node, + text_node: *CData, + buf: std.ArrayListUnmanaged(u8) = .empty, +}; + const Parser = @This(); frame: *Frame, @@ -46,6 +60,7 @@ err: ?Error, container: ParsedNode, arena: Allocator, strings: std.StringHashMapUnmanaged(void), +pending_text: ?PendingText, pub fn init(arena: Allocator, node: *Node, frame: *Frame) Parser { return .{ @@ -57,6 +72,54 @@ pub fn init(arena: Allocator, node: *Node, frame: *Frame) Parser { .data = null, .node = node, }, + .pending_text = null, + }; +} + +fn flushPendingText(self: *Parser) !void { + var pt = self.pending_text orelse return; + self.pending_text = null; + defer pt.buf.deinit(self.arena); + pt.text_node._data = try lp.String.init( + self.frame.arena, + pt.buf.items, + .{ .dupe = true }, + ); +} + +fn appendTextChunk(self: *Parser, parent: *Node, txt: []const u8) !void { + if (self.pending_text) |*pt| { + if (pt.parent == parent and parent.lastChild() == pt.text_node.asNode()) { + try pt.buf.appendSlice(self.arena, txt); + return; + } + try self.flushPendingText(); + } + + if (parent.lastChild()) |sibling| { + if (sibling.is(CData.Text)) |tn| { + const cdata = tn._proto; + const existing = cdata.getData().str(); + var buf: std.ArrayListUnmanaged(u8) = .empty; + errdefer buf.deinit(self.arena); + try buf.ensureTotalCapacityPrecise(self.arena, existing.len + txt.len); + buf.appendSliceAssumeCapacity(existing); + buf.appendSliceAssumeCapacity(txt); + self.pending_text = .{ .parent = parent, .text_node = cdata, .buf = buf }; + return; + } + } + + const new_text = try self.frame.createTextNode(txt); + try self.frame.appendNew(parent, new_text); + + var buf: std.ArrayListUnmanaged(u8) = .empty; + errdefer buf.deinit(self.arena); + try buf.appendSlice(self.arena, txt); + self.pending_text = .{ + .parent = parent, + .text_node = new_text.is(CData.Text).?._proto, + .buf = buf, }; } @@ -101,6 +164,9 @@ pub fn parse(self: *Parser, html: []const u8) void { appendBeforeSiblingCallback, appendBasedOnParentNodeCallback, ); + self.flushPendingText() catch |err| { + if (self.err == null) self.err = .{ .err = err, .source = .append }; + }; } /// Parse HTML with encoding conversion. Converts from charset to UTF-8 before parsing. @@ -127,6 +193,9 @@ pub fn parseWithEncoding(self: *Parser, html: []const u8, charset: []const u8) v appendBeforeSiblingCallback, appendBasedOnParentNodeCallback, ); + self.flushPendingText() catch |err| { + if (self.err == null) self.err = .{ .err = err, .source = .append }; + }; } pub fn parseXML(self: *Parser, xml: []const u8) void { @@ -150,6 +219,9 @@ pub fn parseXML(self: *Parser, xml: []const u8) void { appendBeforeSiblingCallback, appendBasedOnParentNodeCallback, ); + self.flushPendingText() catch |err| { + if (self.err == null) self.err = .{ .err = err, .source = .append }; + }; } pub fn parseFragment(self: *Parser, html: []const u8) void { @@ -173,6 +245,9 @@ pub fn parseFragment(self: *Parser, html: []const u8) void { appendBeforeSiblingCallback, appendBasedOnParentNodeCallback, ); + self.flushPendingText() catch |err| { + if (self.err == null) self.err = .{ .err = err, .source = .append }; + }; } pub const Streaming = struct { @@ -233,8 +308,9 @@ pub const Streaming = struct { } } - pub fn done(self: *Streaming) void { + pub fn done(self: *Streaming) !void { h5e.html5ever_streaming_parser_finish(self.handle.?); + try self.parser.flushPendingText(); } }; @@ -252,6 +328,9 @@ fn popCallback(ctx: *anyopaque, node_ref: *anyopaque) callconv(.c) void { } fn _popCallback(self: *Parser, node: *Node) !void { + // Flush before any nodeComplete so Build.complete (and any custom-element + // callbacks reachable from it) observe the final text data. + try self.flushPendingText(); try self.frame.nodeComplete(node); } @@ -340,7 +419,7 @@ fn _appendDoctypeToDocument(self: *Parser, name: []const u8, public_id: []const }); // Append it to the document - try frame.appendNew(self.container.node, .{ .node = doctype.asNode() }); + try frame.appendNew(self.container.node, doctype.asNode()); } fn addAttrsIfMissingCallback(ctx: *anyopaque, target_ref: *anyopaque, attributes: h5e.AttributeIterator) callconv(.c) void { @@ -402,6 +481,10 @@ fn _appendCallback(self: *Parser, parent: *Node, node_or_text: h5e.NodeOrText) ! // child node is guaranteed not to belong to another parent switch (node_or_text.toUnion()) { .node => |cpn| { + // Inserting a non-text child terminates any pending text run; flush + // before the insertion so that connectedCallback (etc.) sees the + // final data on the preceding text sibling. + try self.flushPendingText(); const child = getNode(cpn); if (child._parent) |previous_parent| { // html5ever says this can't happen, but we might be screwing up @@ -414,9 +497,9 @@ fn _appendCallback(self: *Parser, parent: *Node, node_or_text: h5e.NodeOrText) ! } self.frame.removeNode(previous_parent, child, .{ .will_be_reconnected = parent.isConnected() }); } - try self.frame.appendNew(parent, .{ .node = child }); + try self.frame.appendNew(parent, child); }, - .text => |txt| try self.frame.appendNew(parent, .{ .text = txt }), + .text => |txt| try self.appendTextChunk(parent, txt), } } @@ -448,6 +531,10 @@ fn appendBeforeSiblingCallback(ctx: *anyopaque, sibling_ref: *anyopaque, node_or }; } fn _appendBeforeSiblingCallback(self: *Parser, sibling: *Node, node_or_text: h5e.NodeOrText) !void { + // Foster parenting / before-sibling insertions interrupt any pending text + // run (the new node lands at a different position from the pending text's + // tail). Flush before reading the parent's structure. + try self.flushPendingText(); const parent = sibling.parentNode() orelse return error.NoParent; const node: *Node = switch (node_or_text.toUnion()) { .node => |cpn| blk: { diff --git a/src/browser/tests/cdata/raw_text_chunked.html b/src/browser/tests/cdata/raw_text_chunked.html new file mode 100644 index 00000000..021b83c8 --- /dev/null +++ b/src/browser/tests/cdata/raw_text_chunked.html @@ -0,0 +1,71 @@ + + + + + + + + + + + +A page <with> many <tags> in <the> title + + diff --git a/src/browser/webapi/Document.zig b/src/browser/webapi/Document.zig index 9e40d793..4e305cdd 100644 --- a/src/browser/webapi/Document.zig +++ b/src/browser/webapi/Document.zig @@ -837,7 +837,7 @@ pub fn close(self: *Document, frame: *Frame) !void { // done() calls html5ever_streaming_parser_finish which frees the parser // We must NOT call deinit() after done() as that would be a double-free - self._script_created_parser.?.done(); + try self._script_created_parser.?.done(); // Just null out the handle since done() already freed it self._script_created_parser.?.handle = null; self._script_created_parser = null; From 60219e69e917467cd0ac658223b5c4cf438524bf Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Sat, 9 May 2026 21:48:46 +0200 Subject: [PATCH 15/34] parser: address review findings for raw-text merge - Flush pending text in _removeFromParentCallback and _reparentChildrenCallback. Without these, html5ever can detach or reparent the pending text node mid-parse and a later flush would write accumulated bytes onto a node no longer in the tree (or to the wrong parent). - Streaming.done now nulls self.handle right after html5ever_finish, before flushPendingText. If the flush errors the handle is already cleared, so dropping the Streaming can't double-free. - Document.close uses a defer to clear _script_created_parser even when done() returns an error. Document.write's parser-panic path now attempts a final flush before dropping the streaming parser, so whatever bytes html5ever fed before the panic still land on their text node. - raw_text_chunked.html: larger raw-text bodies and exact byte counts per element. Catches future deferred-merge regressions that drop or duplicate a chunk; the memory bound itself is verified out-of-band via the live reproducer in the PR description. Refs #2397 --- src/browser/parser/Parser.zig | 20 ++- src/browser/tests/cdata/raw_text_chunked.html | 123 +++++++++++++----- src/browser/webapi/Document.zig | 18 ++- 3 files changed, 120 insertions(+), 41 deletions(-) diff --git a/src/browser/parser/Parser.zig b/src/browser/parser/Parser.zig index f4d58e85..d7fd3098 100644 --- a/src/browser/parser/Parser.zig +++ b/src/browser/parser/Parser.zig @@ -76,7 +76,7 @@ pub fn init(arena: Allocator, node: *Node, frame: *Frame) Parser { }; } -fn flushPendingText(self: *Parser) !void { +pub fn flushPendingText(self: *Parser) !void { var pt = self.pending_text orelse return; self.pending_text = null; defer pt.buf.deinit(self.arena); @@ -309,7 +309,14 @@ pub const Streaming = struct { } pub fn done(self: *Streaming) !void { - h5e.html5ever_streaming_parser_finish(self.handle.?); + // Null the handle before finish() so a flushPendingText failure can't + // leave a finished-but-still-referenced handle behind for deinit to + // double-free. flushPendingText doesn't touch the html5ever handle — + // it only reads pending_text and writes to a text node's _data — so + // running it after finish is safe. + const handle = self.handle.?; + self.handle = null; + h5e.html5ever_streaming_parser_finish(handle); try self.parser.flushPendingText(); } }; @@ -510,6 +517,11 @@ fn removeFromParentCallback(ctx: *anyopaque, target_ref: *anyopaque) callconv(.c }; } fn _removeFromParentCallback(self: *Parser, node: *Node) !void { + // Removing a node mid-parse can detach the pending text node or its + // parent; either way the pending invariant breaks. Flush first so the + // accumulated bytes land on a still-attached text node (and pending_text + // is cleared before any subsequent chunk targets a fresh node). + try self.flushPendingText(); const parent = node.parentNode() orelse return; _ = try parent.removeChild(node, self.frame); } @@ -521,6 +533,10 @@ fn reparentChildrenCallback(ctx: *anyopaque, node_ref: *anyopaque, new_parent_re }; } fn _reparentChildrenCallback(self: *Parser, node: *Node, new_parent: *Node) !void { + // Reparenting can move the pending text node out from under us — the + // node's _parent changes but pending_text.parent does not. Flush so the + // accumulator commits before the tree is rearranged. + try self.flushPendingText(); try self.frame.appendAllChildren(node, new_parent); } diff --git a/src/browser/tests/cdata/raw_text_chunked.html b/src/browser/tests/cdata/raw_text_chunked.html index 021b83c8..8a5503a4 100644 --- a/src/browser/tests/cdata/raw_text_chunked.html +++ b/src/browser/tests/cdata/raw_text_chunked.html @@ -6,66 +6,123 @@ When the html5ever tokenizer is in script-data / rawtext / rcdata state and encounters '<', it flushes the pending character buffer and re-enters via a - fresh AppendText callback. Before the fix, every chunk re-allocated and - re-copied the growing text on the page-lifetime arena (O(N^2)). Real pages - with embedded HTML strings inside JS literals blew memory to gigabytes. + fresh AppendText callback. Pre-fix, every chunk re-allocated and re-copied + the growing text on the page-lifetime arena (O(N^2)). Real pages with + embedded HTML strings inside JS literals blew memory to gigabytes. - After the fix, raw-text-mode element bodies must still parse to exactly ONE - text node whose data is byte-identical to the source body. + This test guards correctness of the deferred-merge state machine in + Parser.appendTextChunk / flushPendingText: a stress case big enough to + cross many chunk boundaries, and exact byte-length assertions so that any + future regression that drops or duplicates a chunk is caught. The memory + bound itself is measured externally via the live reproducer documented in + the PR description (apple.com US iPhone page: 3.5 GB → 125 MB). --> + - + + -A page <with> many <tags> in <the> title +A page <with> many <tags> in <the> title for testing diff --git a/src/browser/webapi/Document.zig b/src/browser/webapi/Document.zig index 4e305cdd..dd29a6ce 100644 --- a/src/browser/webapi/Document.zig +++ b/src/browser/webapi/Document.zig @@ -706,7 +706,13 @@ fn writeInternal(self: *Document, text: []const []const u8, append_newline: bool if (self._script_created_parser) |*parser| { parser.read(html) catch |err| { log.warn(.dom, "document.write parser error", .{ .err = err }); - // was already closed + // html5ever's handle was destroyed inside read(), but the + // pending text buffer (if any) still wants to land on its + // text node's _data — flushPendingText doesn't depend on + // the handle, so attempt a final flush before dropping. + parser.parser.flushPendingText() catch |flush_err| { + log.warn(.dom, "flush after parser panic", .{ .err = flush_err }); + }; self._script_created_parser = null; }; } @@ -835,12 +841,12 @@ pub fn close(self: *Document, frame: *Frame) !void { return; } - // done() calls html5ever_streaming_parser_finish which frees the parser - // We must NOT call deinit() after done() as that would be a double-free + // done() finishes html5ever's handle and runs the final flushPendingText. + // Even if flushPendingText errors, the handle is already finished and we + // must not retain the Streaming — defer so the error path also drops it. + // (Streaming.done nulls its own handle, so dropping the struct is safe.) + defer self._script_created_parser = null; try self._script_created_parser.?.done(); - // Just null out the handle since done() already freed it - self._script_created_parser.?.handle = null; - self._script_created_parser = null; frame.documentIsComplete(); } From 55a42fe5c6be7e0ace460af09d1f5fec18bde2ed Mon Sep 17 00:00:00 2001 From: Halil Durak Date: Mon, 11 May 2026 14:35:30 +0300 Subject: [PATCH 16/34] `HTMLLinkElement`: add `media` getter/setter --- src/browser/webapi/element/html/Link.zig | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/browser/webapi/element/html/Link.zig b/src/browser/webapi/element/html/Link.zig index 8f561355..c8bcafa1 100644 --- a/src/browser/webapi/element/html/Link.zig +++ b/src/browser/webapi/element/html/Link.zig @@ -71,6 +71,14 @@ pub fn setAs(self: *Link, value: []const u8, frame: *Frame) !void { return self.asElement().setAttributeSafe(comptime .wrap("as"), .wrap(value), frame); } +pub fn getMedia(self: *Link) []const u8 { + return self.asElement().getAttributeSafe(comptime .wrap("media")) orelse return ""; +} + +pub fn setMedia(self: *Link, value: []const u8, frame: *Frame) !void { + return self.asElement().setAttributeSafe(comptime .wrap("media"), .wrap(value), frame); +} + pub fn getCrossOrigin(self: *const Link) ?[]const u8 { return self.asConstElement().getAttributeSafe(comptime .wrap("crossOrigin")); } @@ -120,6 +128,7 @@ pub const JsApi = struct { pub const as = bridge.accessor(Link.getAs, Link.setAs, .{}); pub const rel = bridge.accessor(Link.getRel, Link.setRel, .{}); + pub const media = bridge.accessor(Link.getMedia, Link.setMedia, .{}); pub const href = bridge.accessor(Link.getHref, Link.setHref, .{}); pub const crossOrigin = bridge.accessor(Link.getCrossOrigin, Link.setCrossOrigin, .{}); pub const relList = bridge.accessor(_getRelList, null, .{ .null_as_undefined = true }); From 20c7bc14d2fa74957f53ff75a707d50f2f1444c5 Mon Sep 17 00:00:00 2001 From: Halil Durak Date: Mon, 11 May 2026 14:35:41 +0300 Subject: [PATCH 17/34] `HTMLLinkElement`: update tests --- src/browser/tests/element/html/link.html | 31 ++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/browser/tests/element/html/link.html b/src/browser/tests/element/html/link.html index 57d9e28b..8a5f3638 100644 --- a/src/browser/tests/element/html/link.html +++ b/src/browser/tests/element/html/link.html @@ -1,6 +1,31 @@ + + + + + - + ".len * inject_scripts.len; + for (inject_scripts) |script| total += script.len; + break :calc total; + }; + + const inject_chunk = try html.arena.alloc(u8, inject_chunk_size); + defer html.arena.free(inject_chunk); + + // Fill `inject_chunk`. + var from: usize = 0; + for (inject_scripts) |script| { + @memcpy(inject_chunk[from..][0..8], ""); + from += "".len; + } + + // Insert at the beginning of the . + try html.buffer.insertSlice(html.arena, head_end_index, inject_chunk); + } + + break :blk html.buffer.items; + }; if (std.mem.eql(u8, self.charset, "UTF-8")) { parser.parse(raw_html); diff --git a/src/browser/Runner.zig b/src/browser/Runner.zig index a4028f90..009235bb 100644 --- a/src/browser/Runner.zig +++ b/src/browser/Runner.zig @@ -294,22 +294,6 @@ pub fn waitForSelector(self: *Runner, selector: [:0]const u8, timeout_ms: u32) ! } } -pub fn injectScripts(runner: *Runner, scripts: std.ArrayList([:0]const u8)) !void { - const frame = runner.frame; - - for (scripts.items) |source| { - // Create ".len * inject_scripts.len; - for (inject_scripts) |script| total += script.len; - break :calc total; - }; - - const inject_chunk = try html.arena.alloc(u8, inject_chunk_size); - defer html.arena.free(inject_chunk); - - // Fill `inject_chunk`. - var from: usize = 0; - for (inject_scripts) |script| { - @memcpy(inject_chunk[from..][0..8], ""); - from += "".len; - } - - // Insert at the beginning of the . - try html.buffer.insertSlice(html.arena, head_end_index, inject_chunk); - } - - break :blk html.buffer.items; - }; + const raw_html = html.buffer.items; if (std.mem.eql(u8, self.charset, "UTF-8")) { parser.parse(raw_html); @@ -2188,12 +2137,36 @@ pub fn createElementNS(self: *Frame, namespace: Element.Namespace, name: []const attribute_iterator, .{ ._proto = undefined }, ), - asUint("head") => return self.createHtmlElementT( - Element.Html.Head, - namespace, - attribute_iterator, - .{ ._proto = undefined }, - ), + asUint("head") => { + // Inject user-provided scripts. + const inject_scripts = self._session.inject_scripts; + const should_inject_scripts = from_parser and self._parse_mode == .document and inject_scripts.len > 0; + + if (should_inject_scripts) { + var ls: JS.Local.Scope = undefined; + self.js.localScope(&ls); + defer ls.deinit(); + + var try_catch: JS.TryCatch = undefined; + try_catch.init(&ls.local); + defer try_catch.deinit(); + + for (inject_scripts) |inject_script| { + ls.local.eval(inject_script, "inject_script") catch |err| { + const caught = try_catch.caughtOrError(self.call_arena, err); + log.err(.app, "inject script error", .{ .err = caught }); + return error.InjectScriptError; + }; + } + } + + return self.createHtmlElementT( + Element.Html.Head, + namespace, + attribute_iterator, + .{ ._proto = undefined }, + ); + }, asUint("body") => return self.createHtmlElementT( Element.Html.Body, namespace, From 19401dc950f1861796fb8b3850d86ce769c9e679 Mon Sep 17 00:00:00 2001 From: Halil Durak Date: Thu, 7 May 2026 14:29:31 +0300 Subject: [PATCH 28/34] `Config`: update `--inject-script` documentation --- src/Config.zig | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Config.zig b/src/Config.zig index bcef5356..7d476be8 100644 --- a/src/Config.zig +++ b/src/Config.zig @@ -667,9 +667,8 @@ pub fn printUsageAndExit(self: *const Config, success: bool) void { \\--wait-script-file \\ Like --wait-script, but reads the script from a file. \\ - \\--inject-script JavaScript to inject as an inline + + + + + diff --git a/src/testing.zig b/src/testing.zig index 549d1349..ef8693b9 100644 --- a/src/testing.zig +++ b/src/testing.zig @@ -338,12 +338,20 @@ pub var test_notification: *Notification = undefined; pub var test_session: *Session = undefined; const WEB_API_TEST_ROOT = "src/browser/tests/"; -const HtmlRunnerOpts = struct {}; +const HtmlRunnerOpts = struct { + inject_script: ?[]const u8 = null, +}; pub fn htmlRunner(comptime path: []const u8, opts: HtmlRunnerOpts) !void { - _ = opts; defer reset(); + var inject_scripts: [1][]const u8 = undefined; + if (opts.inject_script) |script| { + inject_scripts[0] = script; + test_session.inject_scripts = inject_scripts[0..1]; + } + defer test_session.inject_scripts = &.{}; + const root = try std.fs.path.joinZ(arena_allocator, &.{ WEB_API_TEST_ROOT, path }); const stat = std.fs.cwd().statFile(root) catch |err| { std.debug.print("Failed to stat file: '{s}'", .{root}); From a470f6b68662fd88fbdb41011a89b89a3a5e3dae Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Mon, 11 May 2026 14:36:40 +0200 Subject: [PATCH 31/34] parser: lift merge buffer onto Parser and lazy-buffer single-chunk runs Addresses follow-up review from karlseguin on #2406. The pending-text merge buffer is now a single ArrayList on the Parser, reused across runs via clearRetainingCapacity. In the streaming-parser case (Document.write), parser.arena is the page-lifetime frame.arena, so the previous per-PendingText buf.deinit was a no-op and growth artifacts accumulated. With one shared buffer, total dead memory is bounded to one peak-run-sized allocation regardless of how many text runs the parse contains. Single-chunk text runs no longer touch the buffer. The first chunk lives only on CData._data via createTextNode; the buffer is seeded from text_node.getData().str() only when a second chunk arrives at the same parent and last_child. flushPendingText is a no-op when the buffer is empty. Restores the common-case allocation count to 1 (matching main), vs 3 in the previous PR head. Benchmark deltas (ReleaseFast, peak RSS, 5-run median): - 10K-paragraph synthetic page: 39 MB -> 37 MB - 20K single-chunk script synthetic: 56 MB -> 54 MB - 100 x 48 KB multi-chunk scripts: within noise (~46 MB) - apple.com US iPhone live page: within JS-driven noise (~92 MB) Refs #2397 --- src/browser/parser/Parser.zig | 55 +++++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 19 deletions(-) diff --git a/src/browser/parser/Parser.zig b/src/browser/parser/Parser.zig index d7fd3098..8584132f 100644 --- a/src/browser/parser/Parser.zig +++ b/src/browser/parser/Parser.zig @@ -45,12 +45,11 @@ pub const ParsedNode = struct { // AppendText callback per chunk. Merging via String.concat in the previous // implementation was O(N^2/chunk_size) on the page-lifetime arena, blowing // memory on inline JS that contains embedded HTML strings (issue #2397). -// Instead, we accumulate same-parent chunks in this struct and commit a -// single allocation on flush. +// Instead, we keep a single Parser-level buf and accumulate same-parent +// chunks into it, committing once on flush. const PendingText = struct { parent: *Node, text_node: *CData, - buf: std.ArrayListUnmanaged(u8) = .empty, }; const Parser = @This(); @@ -61,6 +60,16 @@ container: ParsedNode, arena: Allocator, strings: std.StringHashMapUnmanaged(void), pending_text: ?PendingText, +// One buffer reused across every text run in this parser. clearRetainingCapacity +// on flush keeps the largest capacity ever needed, so total dead memory on the +// parser arena is bounded to one peak-run-sized allocation regardless of how +// many text runs the parse contains. Matters for Streaming, whose arena is the +// page-lifetime frame.arena (individual frees are no-ops there). +// +// Single-chunk text runs leave this buf empty: the chunk lives only in +// CData._data via createTextNode. The buf is seeded from _data.str() on the +// second chunk of a run, so the common case stays at one copy. +buf: std.ArrayListUnmanaged(u8), pub fn init(arena: Allocator, node: *Node, frame: *Frame) Parser { return .{ @@ -73,24 +82,34 @@ pub fn init(arena: Allocator, node: *Node, frame: *Frame) Parser { .node = node, }, .pending_text = null, + .buf = .empty, }; } pub fn flushPendingText(self: *Parser) !void { - var pt = self.pending_text orelse return; + const pt = self.pending_text orelse return; self.pending_text = null; - defer pt.buf.deinit(self.arena); + // Single-chunk run: data already lives on _data via createTextNode. + if (self.buf.items.len == 0) return; + defer self.buf.clearRetainingCapacity(); pt.text_node._data = try lp.String.init( self.frame.arena, - pt.buf.items, + self.buf.items, .{ .dupe = true }, ); } fn appendTextChunk(self: *Parser, parent: *Node, txt: []const u8) !void { - if (self.pending_text) |*pt| { + if (self.pending_text) |pt| { if (pt.parent == parent and parent.lastChild() == pt.text_node.asNode()) { - try pt.buf.appendSlice(self.arena, txt); + // Second+ chunk of the same run. If buf is still empty, promote + // from the single-chunk fast path by seeding from _data first. + if (self.buf.items.len == 0) { + const existing = pt.text_node.getData().str(); + try self.buf.ensureTotalCapacity(self.arena, existing.len + txt.len); + self.buf.appendSliceAssumeCapacity(existing); + } + try self.buf.appendSlice(self.arena, txt); return; } try self.flushPendingText(); @@ -98,28 +117,26 @@ fn appendTextChunk(self: *Parser, parent: *Node, txt: []const u8) !void { if (parent.lastChild()) |sibling| { if (sibling.is(CData.Text)) |tn| { + // Existing text sibling without a matching pending_text. Seed the + // buf from its _data and register pending so subsequent chunks + // accumulate cheaply. const cdata = tn._proto; const existing = cdata.getData().str(); - var buf: std.ArrayListUnmanaged(u8) = .empty; - errdefer buf.deinit(self.arena); - try buf.ensureTotalCapacityPrecise(self.arena, existing.len + txt.len); - buf.appendSliceAssumeCapacity(existing); - buf.appendSliceAssumeCapacity(txt); - self.pending_text = .{ .parent = parent, .text_node = cdata, .buf = buf }; + try self.buf.ensureTotalCapacity(self.arena, existing.len + txt.len); + self.buf.appendSliceAssumeCapacity(existing); + self.buf.appendSliceAssumeCapacity(txt); + self.pending_text = .{ .parent = parent, .text_node = cdata }; return; } } + // Fresh text run: the first chunk lives on _data only. buf stays empty + // until (and unless) a second chunk arrives. const new_text = try self.frame.createTextNode(txt); try self.frame.appendNew(parent, new_text); - - var buf: std.ArrayListUnmanaged(u8) = .empty; - errdefer buf.deinit(self.arena); - try buf.appendSlice(self.arena, txt); self.pending_text = .{ .parent = parent, .text_node = new_text.is(CData.Text).?._proto, - .buf = buf, }; } From 258003ca9000ab0639576eca6e886f391331e467 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Mon, 11 May 2026 20:42:11 +0800 Subject: [PATCH 32/34] ArrayListUnmanaged (deprecated name) -> ArrayList --- src/browser/parser/Parser.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/browser/parser/Parser.zig b/src/browser/parser/Parser.zig index 8584132f..fe6e1219 100644 --- a/src/browser/parser/Parser.zig +++ b/src/browser/parser/Parser.zig @@ -69,7 +69,7 @@ pending_text: ?PendingText, // Single-chunk text runs leave this buf empty: the chunk lives only in // CData._data via createTextNode. The buf is seeded from _data.str() on the // second chunk of a run, so the common case stays at one copy. -buf: std.ArrayListUnmanaged(u8), +buf: std.ArrayList(u8), pub fn init(arena: Allocator, node: *Node, frame: *Frame) Parser { return .{ From 082994c3317da42cff3f9057ace54d17193c3d57 Mon Sep 17 00:00:00 2001 From: Karl Seguin Date: Mon, 11 May 2026 16:06:35 +0800 Subject: [PATCH 33/34] Allow HTML Tests to set a timeout Change worker timeout to 8seconds. This test can be slow on a slow CI with TSAN enabled. --- src/browser/webapi/Worker.zig | 5 ++++- src/testing.zig | 9 +++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/browser/webapi/Worker.zig b/src/browser/webapi/Worker.zig index 67d24526..5d571fa6 100644 --- a/src/browser/webapi/Worker.zig +++ b/src/browser/webapi/Worker.zig @@ -416,5 +416,8 @@ pub const JsApi = struct { const testing = @import("../../testing.zig"); test "WebApi: Worker" { - try testing.htmlRunner("worker", .{}); + // Worker tests chain a worker-script fetch with a dynamic-import fetch + // and a cross-context postMessage. The default 2 s assertion budget can + // blow up on TSAN CI; give it more room. + try testing.htmlRunner("worker", .{ .timeout_ms = 8000 }); } diff --git a/src/testing.zig b/src/testing.zig index ef8693b9..9e59ebc0 100644 --- a/src/testing.zig +++ b/src/testing.zig @@ -339,6 +339,7 @@ pub var test_session: *Session = undefined; const WEB_API_TEST_ROOT = "src/browser/tests/"; const HtmlRunnerOpts = struct { + timeout_ms: u32 = 2000, inject_script: ?[]const u8 = null, }; @@ -364,7 +365,7 @@ pub fn htmlRunner(comptime path: []const u8, opts: HtmlRunnerOpts) !void { return; } try @import("root").subtest(root); - try runWebApiTest(root); + try runWebApiTest(root, opts.timeout_ms); }, .directory => { var dir = try std.fs.cwd().openDir(root, .{ @@ -390,7 +391,7 @@ pub fn htmlRunner(comptime path: []const u8, opts: HtmlRunnerOpts) !void { const full_path = try std.fs.path.joinZ(arena_allocator, &.{ root, entry.name }); try @import("root").subtest(entry.name); - try runWebApiTest(full_path); + try runWebApiTest(full_path, opts.timeout_ms); } }, else => |kind| { @@ -400,7 +401,7 @@ pub fn htmlRunner(comptime path: []const u8, opts: HtmlRunnerOpts) !void { } } -fn runWebApiTest(test_file: [:0]const u8) !void { +fn runWebApiTest(test_file: [:0]const u8, timeout_ms: u32) !void { const frame = try test_session.createPage(); defer test_session.removePage(); @@ -426,7 +427,7 @@ fn runWebApiTest(test_file: [:0]const u8) !void { var runner = try test_session.runner(.{}); try runner.wait(.{ .ms = 2000, .until = .load }); - var wait_ms: u32 = 2000; + var wait_ms: u32 = timeout_ms; var timer = try std.time.Timer.start(); while (true) { var try_catch: js.TryCatch = undefined; From 105f5028c8e5ac5e2c90a44f9ae7e7358d83e05c Mon Sep 17 00:00:00 2001 From: Muki Kiboigo Date: Mon, 11 May 2026 06:50:03 -0700 Subject: [PATCH 34/34] update nix flake --- flake.lock | 49 ++++++++++++++++--------------------------------- flake.nix | 2 +- 2 files changed, 17 insertions(+), 34 deletions(-) diff --git a/flake.lock b/flake.lock index dcf837fa..01cb7067 100644 --- a/flake.lock +++ b/flake.lock @@ -8,11 +8,11 @@ "rust-analyzer-src": "rust-analyzer-src" }, "locked": { - "lastModified": 1770708269, - "narHash": "sha256-OnZW86app7hHJJoB5lC9GNXY5QBBIESJB+sIdwEyld0=", + "lastModified": 1778493576, + "narHash": "sha256-/vvNyF8C2tNTkxtffGUQbcTJvf72cRw3qo8cyBh33pM=", "owner": "nix-community", "repo": "fenix", - "rev": "6b5325a017a9a9fe7e6252ccac3680cc7181cd63", + "rev": "5bf88a04d8678c7334f2f5072975f3b2cb0fe1ba", "type": "github" }, "original": { @@ -55,24 +55,6 @@ "type": "github" } }, - "flake-utils_2": { - "inputs": { - "systems": "systems_2" - }, - "locked": { - "lastModified": 1705309234, - "narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, "gitignore": { "inputs": { "nixpkgs": [ @@ -96,16 +78,16 @@ }, "nixpkgs": { "locked": { - "lastModified": 1768649915, - "narHash": "sha256-jc21hKogFnxU7KXSVTRmxC7u5D4RHwm9BAvDf5/Z1Uo=", + "lastModified": 1778003029, + "narHash": "sha256-q/nkKLDtHIyLjZpKhWk3cSK5IYsFqtMd6UtXF3ddjgA=", "owner": "nixos", "repo": "nixpkgs", - "rev": "3e3f3c7f9977dc123c23ee21e8085ed63daf8c37", + "rev": "0c88e1f2bdb93d5999019e99cb0e61e1fe2af4c5", "type": "github" }, "original": { "owner": "nixos", - "ref": "release-25.05", + "ref": "nixos-25.11", "repo": "nixpkgs", "type": "github" } @@ -122,11 +104,11 @@ "rust-analyzer-src": { "flake": false, "locked": { - "lastModified": 1770668050, - "narHash": "sha256-Q05yaIZtQrBKHpyWaPmyJmDRj0lojnVf8nUFE0vydcY=", + "lastModified": 1778424672, + "narHash": "sha256-v/CZ9tJT+ulSe3ZmjuG3lWABwOvITbT7EqF/2NAl3Hs=", "owner": "rust-lang", "repo": "rust-analyzer", - "rev": "9efc1f709f3c8134c3acac5d3592a8e4c184a0c6", + "rev": "e266f5cab8f6525d0bc2ddccc0006418c534b5e6", "type": "github" }, "original": { @@ -152,6 +134,7 @@ } }, "systems_2": { + "flake": false, "locked": { "lastModified": 1681028828, "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", @@ -169,17 +152,17 @@ "zigPkgs": { "inputs": { "flake-compat": "flake-compat", - "flake-utils": "flake-utils_2", "nixpkgs": [ "nixpkgs" - ] + ], + "systems": "systems_2" }, "locked": { - "lastModified": 1770598090, - "narHash": "sha256-k+82IDgTd9o5sxHIqGlvfwseKln3Ejx1edGtDltuPXo=", + "lastModified": 1778375309, + "narHash": "sha256-3+5C2LDX1lmupM6ktG6i50BRvRnN32WLinpxqa2g+HQ=", "owner": "mitchellh", "repo": "zig-overlay", - "rev": "142495696982c88edddc8e17e4da90d8164acadf", + "rev": "057bcab6a8e6a3a85e9293e150d35c63404e8fca", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index d306ae09..ffa96a97 100644 --- a/flake.nix +++ b/flake.nix @@ -2,7 +2,7 @@ description = "headless browser designed for AI and automation"; inputs = { - nixpkgs.url = "github:nixos/nixpkgs/release-25.05"; + nixpkgs.url = "github:nixos/nixpkgs/nixos-25.11"; zigPkgs.url = "github:mitchellh/zig-overlay"; zigPkgs.inputs.nixpkgs.follows = "nixpkgs";