mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-06-11 09:35:59 -04:00
Merge pull request #2305 from navidemad/feat/xpath-1.0-evaluator
xpath: implement XPath 1.0 (Document.evaluate, XPathResult, DOM.performSearch)
This commit is contained in:
@@ -935,6 +935,9 @@ pub const PageJsApis = flattenTypes(&.{
|
||||
@import("../webapi/CryptoKey.zig"),
|
||||
@import("../webapi/Selection.zig"),
|
||||
@import("../webapi/ImageData.zig"),
|
||||
@import("../webapi/XPathResult.zig"),
|
||||
@import("../webapi/XPathExpression.zig"),
|
||||
@import("../webapi/XPathEvaluator.zig"),
|
||||
});
|
||||
|
||||
// APIs available on Worker context globals (constructors like URL, Headers, etc.)
|
||||
|
||||
8
src/browser/tests/cdp/perform_search_xpath.html
Normal file
8
src/browser/tests/cdp/perform_search_xpath.html
Normal file
@@ -0,0 +1,8 @@
|
||||
<!DOCTYPE html>
|
||||
<body>
|
||||
<div id=outer>
|
||||
<p>1</p>
|
||||
<p>2</p>
|
||||
</div>
|
||||
<p>3</p>
|
||||
</body>
|
||||
123
src/browser/tests/xpath/document_evaluate.html
Normal file
123
src/browser/tests/xpath/document_evaluate.html
Normal file
@@ -0,0 +1,123 @@
|
||||
<!DOCTYPE html>
|
||||
<body>
|
||||
<script src="../testing.js"></script>
|
||||
<h1 id=title>Hello</h1>
|
||||
<div class=group>
|
||||
<p id=p1>First</p>
|
||||
<p id=p2>Second</p>
|
||||
<p id=p3>Third</p>
|
||||
</div>
|
||||
<span id=span1 data-x="42">x</span>
|
||||
</body>
|
||||
|
||||
<script id=snapshot_basic>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(true, r instanceof XPathResult);
|
||||
testing.expectEqual(XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, r.resultType);
|
||||
testing.expectEqual(3, r.snapshotLength);
|
||||
testing.expectEqual('p1', r.snapshotItem(0).id);
|
||||
testing.expectEqual('p3', r.snapshotItem(2).id);
|
||||
testing.expectEqual(null, r.snapshotItem(3));
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=default_context>
|
||||
{
|
||||
const r1 = document.evaluate("//p", null, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(3, r1.snapshotLength);
|
||||
const r2 = document.evaluate("//p", undefined, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(3, r2.snapshotLength);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=first_ordered_node>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.FIRST_ORDERED_NODE_TYPE, null);
|
||||
testing.expectEqual(XPathResult.FIRST_ORDERED_NODE_TYPE, r.resultType);
|
||||
testing.expectEqual('p1', r.singleNodeValue.id);
|
||||
|
||||
const empty = document.evaluate("//nope", document, null,
|
||||
XPathResult.FIRST_ORDERED_NODE_TYPE, null);
|
||||
testing.expectEqual(null, empty.singleNodeValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=number_type>
|
||||
{
|
||||
const r = document.evaluate("count(//p)", document, null,
|
||||
XPathResult.NUMBER_TYPE, null);
|
||||
testing.expectEqual(XPathResult.NUMBER_TYPE, r.resultType);
|
||||
testing.expectEqual(3, r.numberValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=string_type>
|
||||
{
|
||||
const r = document.evaluate("string(//h1)", document, null,
|
||||
XPathResult.STRING_TYPE, null);
|
||||
testing.expectEqual(XPathResult.STRING_TYPE, r.resultType);
|
||||
testing.expectEqual('Hello', r.stringValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=boolean_type>
|
||||
{
|
||||
const r = document.evaluate("count(//p) > 0", document, null,
|
||||
XPathResult.BOOLEAN_TYPE, null);
|
||||
testing.expectEqual(XPathResult.BOOLEAN_TYPE, r.resultType);
|
||||
testing.expectEqual(true, r.booleanValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=any_type>
|
||||
{
|
||||
const ns = document.evaluate("//p", document, null,
|
||||
XPathResult.ANY_TYPE, null);
|
||||
testing.expectEqual(XPathResult.UNORDERED_NODE_ITERATOR_TYPE, ns.resultType);
|
||||
testing.expectEqual('p1', ns.iterateNext().id);
|
||||
|
||||
const num = document.evaluate("count(//p)", document, null,
|
||||
XPathResult.ANY_TYPE, null);
|
||||
testing.expectEqual(XPathResult.NUMBER_TYPE, num.resultType);
|
||||
testing.expectEqual(3, num.numberValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=context_node_scoping>
|
||||
{
|
||||
const div = document.querySelector('div.group');
|
||||
const r = document.evaluate("./p", div, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(3, r.snapshotLength);
|
||||
testing.expectEqual('p1', r.snapshotItem(0).id);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=create_expression>
|
||||
{
|
||||
const expr = document.createExpression("//p", null);
|
||||
testing.expectEqual(true, expr instanceof XPathExpression);
|
||||
const r = expr.evaluate(document, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(3, r.snapshotLength);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=create_ns_resolver>
|
||||
{
|
||||
const resolver = document.createNSResolver(document);
|
||||
testing.expectEqual(document, resolver);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=attribute_axis>
|
||||
{
|
||||
const r = document.evaluate("//span/@data-x", document, null,
|
||||
XPathResult.STRING_TYPE, null);
|
||||
testing.expectEqual('42', r.stringValue);
|
||||
}
|
||||
</script>
|
||||
201
src/browser/tests/xpath/xpath_conformance.html
Normal file
201
src/browser/tests/xpath/xpath_conformance.html
Normal file
@@ -0,0 +1,201 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>XPath conformance</title>
|
||||
<script src="../testing.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<h1 id="heading" class="primary">Hello World</h1>
|
||||
<p id="p1" lang="en" data-x="1">First paragraph with <em>emphasis</em>.</p>
|
||||
<p id="p2" class="note">Second paragraph.</p>
|
||||
<ul id="list">
|
||||
<li class="item odd">Item 1</li>
|
||||
<li class="item even">Item 2</li>
|
||||
<li class="item odd">Item 3</li>
|
||||
<li class="item even">Item 4</li>
|
||||
<li class="item odd">Item 5</li>
|
||||
</ul>
|
||||
<table id="t">
|
||||
<thead><tr><th>Name</th><th>Age</th></tr></thead>
|
||||
<tbody>
|
||||
<tr class="r"><td>Alice</td><td>30</td></tr>
|
||||
<tr class="r"><td>Bob</td><td>25</td></tr>
|
||||
<tr class="r"><td>Carol</td><td>40</td></tr>
|
||||
</tbody>
|
||||
</table>
|
||||
<div id="container">
|
||||
<section id="s1"><span>A</span><span>B</span></section>
|
||||
<section id="s2"><a href="/foo" id="link1">Click me</a></section>
|
||||
<section id="s3"><a href="/bar" id="link2">Other link</a></section>
|
||||
</div>
|
||||
<form id="form">
|
||||
<label for="name">Name</label>
|
||||
<input id="name" type="text" name="name" value="">
|
||||
<input id="email" type="email" name="email" value="">
|
||||
<input id="hidden" type="hidden" name="csrf" value="x">
|
||||
<input id="checkbox" type="checkbox" name="agree">
|
||||
<button id="btn" type="submit">Submit</button>
|
||||
</form>
|
||||
<!-- a comment node -->
|
||||
<div id="multi-class" class="alpha beta gamma"></div>
|
||||
<article id="art">
|
||||
<p>One</p>
|
||||
<p>Two</p>
|
||||
<p>Three</p>
|
||||
</article>
|
||||
|
||||
<script id=conformance_battery>
|
||||
{
|
||||
// XPath 1.0 conformance battery. Result counts derive from the body
|
||||
// fixture above; keep the two in sync.
|
||||
const cases = [
|
||||
// Absolute paths
|
||||
["/html", 1, "absolute root child"],
|
||||
["/html/body", 1, "/html/body absolute"],
|
||||
["/", 1, "root only"],
|
||||
|
||||
// Descendant abbreviations
|
||||
["//h1", 1, "// descendant"],
|
||||
["//ul/li", 5, "/ child"],
|
||||
["//ul//li", 5, "// nested descendant"],
|
||||
[".", 1, "self ."],
|
||||
[".//li", 5, "context-rel descendant"],
|
||||
|
||||
// Wildcards
|
||||
["//section/*", 4, "//section/* (2 spans + 2 anchors)"],
|
||||
["//*[@id='heading']", 1, "//*[@id]"],
|
||||
|
||||
// Axes
|
||||
["//li[1]/following-sibling::li", 4, "following-sibling"],
|
||||
["//li[5]/preceding-sibling::li", 4, "preceding-sibling"],
|
||||
["//li/parent::ul", 1, "parent::"],
|
||||
["//li/ancestor::body", 1, "ancestor::body"],
|
||||
["//li/ancestor-or-self::body", 1, "ancestor-or-self::"],
|
||||
["//li[3]/preceding::li", 2, "preceding axis"],
|
||||
["//li[1]/following::li", 4, "following axis"],
|
||||
["//ul/descendant::li", 5, "descendant axis"],
|
||||
["//ul/descendant-or-self::li", 5, "descendant-or-self::li"],
|
||||
["//section[1]/child::span", 2, "child:: explicit"],
|
||||
["//*[@id='heading']/self::h1", 1, "self:: type guard"],
|
||||
|
||||
// Attribute axis
|
||||
["//a[1]/attribute::href", 2, "attribute::href"],
|
||||
["//a[1]/@*", 4, "@* (2 anchors x 2 attrs)"],
|
||||
|
||||
// Position predicates
|
||||
["//li[1]", 1, "[1]"],
|
||||
["//li[last()]", 1, "[last()]"],
|
||||
["//li[last() - 1]", 1, "[last() - 1]"],
|
||||
["//li[position() = 1]", 1, "explicit position()"],
|
||||
["//li[position() > 2]", 3, "position > 2"],
|
||||
["//li[position() mod 2 = 1]", 3, "position mod 2 = 1 (odd)"],
|
||||
["(//li)[1]", 1, "(//li)[1] filter on group"],
|
||||
["(//section)[2]", 1, "(//section)[2] grouped"],
|
||||
|
||||
// Reverse-axis proximity-order predicates
|
||||
["//li[3]/preceding-sibling::li[1]", 1, "reverse axis [1] = nearest"],
|
||||
["//li[5]/ancestor::*[1]", 1, "ancestor::*[1] = parent ul"],
|
||||
|
||||
// Multi-predicate / chained
|
||||
["//li[contains(concat(' ', @class, ' '), ' even ')][2]", 1, "filter then position [2]"],
|
||||
["//*[@id='heading' and @class='primary']", 1, "and"],
|
||||
["//*[@id='heading' or @id='p1']", 2, "or"],
|
||||
|
||||
// Sub-path predicates
|
||||
["//section[a]", 2, "section with a child"],
|
||||
["//section[count(span) = 2]", 1, "count() in predicate"],
|
||||
["//ul[count(li) = 5]", 1, "count() = 5"],
|
||||
["//tr[td[1]]", 3, "tr with first td (sub-step)"],
|
||||
["//tr[td/text() = 'Bob']", 1, "deep sub-path equality"],
|
||||
|
||||
// String functions
|
||||
["//*[starts-with(@id, 'link')]", 2, "starts-with"],
|
||||
["//*[normalize-space() = 'Hello World']", 1, "normalize-space() default arg"],
|
||||
["//*[normalize-space(.) = 'Item 1']", 1, "normalize-space(arg)"],
|
||||
["//*[concat(@id, '-x') = 'heading-x']", 1, "concat"],
|
||||
["//*[substring(@id, 1, 1) = 'p']", 2, "substring (3 args)"],
|
||||
["//*[substring(@id, 2, 1) = '1' and starts-with(@id, 'p')]", 1, "substring constrained"],
|
||||
["//p[translate(@id, 'p', 'q') = 'q1']", 1, "translate"],
|
||||
["//*[substring-before(@id, '1') = 'p']", 1, "substring-before"],
|
||||
["//*[substring-after(@id, 'lin') = 'k1']", 1, "substring-after"],
|
||||
|
||||
// Number functions
|
||||
["//tr[number(td[2]) > 28]", 2, "number() in compare"],
|
||||
["//tr[floor(number(td[2]) div 10) = 3]", 1, "floor + div"],
|
||||
["//tr[ceiling(number(td[2]) div 10) = 3]", 2, "ceiling + div"],
|
||||
["//tr[round(number(td[2]) div 10) = 3]", 2, "round half-up"],
|
||||
["//ul[sum(li/@data-len) = 0]", 1, "sum() over empty path -> 0"],
|
||||
|
||||
// Boolean functions
|
||||
["//p[boolean(@lang)]", 1, "boolean()"],
|
||||
["//*[false()]", 0, "false() always-false"],
|
||||
|
||||
// name() / local-name() — lowercased per decision #2
|
||||
["//*[name() = 'h1']", 1, "name() of context"],
|
||||
["//*[local-name() = 'h1']", 1, "local-name() of context"],
|
||||
|
||||
// id()
|
||||
["id('heading')", 1, "id()"],
|
||||
["id('heading p1')", 2, "id() multi-token"],
|
||||
["id(//em/parent::p/@id)", 1, "id() on attribute string-value"],
|
||||
|
||||
// Union
|
||||
["//h1 | //title", 2, "union (h1 + title)"],
|
||||
["//h1 | //*[@id='p1']", 2, "union of 2 different selectors"],
|
||||
["//*[@id='heading'] | //*[@id='heading']", 1, "self-union dedups"],
|
||||
|
||||
// Arithmetic
|
||||
["//li[position() + 1 = 3]", 1, "+"],
|
||||
["//li[position() - 1 = 0]", 1, "-"],
|
||||
["//li[position() * 2 = 4]", 1, "* multiply"],
|
||||
["//li[position() div 2 = 1]", 1, "div"],
|
||||
["//li[(position() mod 2) = 0]", 2, "mod"],
|
||||
|
||||
// Comparison — header tr's td[2] = 'Age' coerces to NaN, so
|
||||
// != against any number is true (NaN equals nothing).
|
||||
["//tr[number(td[2]) = 30]", 1, "= numeric"],
|
||||
["//tr[number(td[2]) != 30]", 3, "!= numeric (header NaN passes)"],
|
||||
["//tr[number(td[2]) < 30]", 1, "< numeric"],
|
||||
["//tr[number(td[2]) <= 30]", 2, "<= numeric"],
|
||||
["//tr[number(td[2]) > 30]", 1, "> numeric"],
|
||||
["//tr[number(td[2]) >= 30]", 2, ">= numeric"],
|
||||
["//tr[td[2] = 30]", 1, "string-vs-number coercion"],
|
||||
["//tr[td[2] = '30']", 1, "string-vs-string equality"],
|
||||
|
||||
// Node tests
|
||||
["//comment()", 1, "comment() node test"],
|
||||
|
||||
// Capybara-style real-world expressions
|
||||
[".//a[contains(normalize-space(string(.)), 'Click me')]", 1, "Capybara link locator"],
|
||||
[".//input[(./@type = 'text')]", 1, "Capybara text-field"],
|
||||
[".//*[@id='heading']", 1, "find-by-id"],
|
||||
[".//li[contains(concat(' ', @class, ' '), ' even ')]", 2, "class contains pattern"],
|
||||
|
||||
// Tricky / edge
|
||||
["//*[@id='heading']/text()", 1, "text() child of element"],
|
||||
["//em/parent::p", 1, "parent of inline"],
|
||||
["//p[em]", 1, "p with em descendant"],
|
||||
["//p[not(em)]", 4, "p without em"],
|
||||
["//section[a/@href = '/foo']", 1, "deep attribute eq"],
|
||||
["//ul/li[last()][position() = last()]", 1, "double last()"],
|
||||
["//ul[string(count(li)) = '5']", 1, "string() of number"],
|
||||
["//body[count(//*[contains(@class, 'item')]) = 5]", 1, "nested count of contains()"],
|
||||
];
|
||||
|
||||
for (const [xp, expected, desc] of cases) {
|
||||
let r;
|
||||
try {
|
||||
r = document.evaluate(xp, document, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
} catch (e) {
|
||||
testing.fail(`[${desc}] ${xp} → threw: ${(e && e.message) || e}`);
|
||||
}
|
||||
if (r.snapshotLength !== expected) {
|
||||
testing.fail(`[${desc}] ${xp} → got ${r.snapshotLength}, expected ${expected}`);
|
||||
}
|
||||
}
|
||||
testing.expectEqual(91, cases.length);
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
103
src/browser/tests/xpath/xpath_evaluator.html
Normal file
103
src/browser/tests/xpath/xpath_evaluator.html
Normal file
@@ -0,0 +1,103 @@
|
||||
<!DOCTYPE html>
|
||||
<body>
|
||||
<script src="../testing.js"></script>
|
||||
<h1 id=h>Hello</h1>
|
||||
<p id=p1>One</p>
|
||||
<p id=p2>Two</p>
|
||||
</body>
|
||||
|
||||
<script id=create_expression>
|
||||
{
|
||||
const expr = document.createExpression("//p", null);
|
||||
testing.expectEqual(true, expr instanceof XPathExpression);
|
||||
testing.expectEqual('function', typeof expr.evaluate);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=expression_evaluate>
|
||||
{
|
||||
const expr = document.createExpression("//p", null);
|
||||
const r = expr.evaluate(document, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(true, r instanceof XPathResult);
|
||||
testing.expectEqual(2, r.snapshotLength);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=expression_reuse_distinct_types>
|
||||
{
|
||||
// The cached AST should support multiple evaluations against
|
||||
// different requested types.
|
||||
const expr = document.createExpression("//p", null);
|
||||
|
||||
const snap = expr.evaluate(document, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(2, snap.snapshotLength);
|
||||
|
||||
const iter = expr.evaluate(document, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
|
||||
testing.expectEqual('p1', iter.iterateNext().id);
|
||||
testing.expectEqual('p2', iter.iterateNext().id);
|
||||
|
||||
const first = expr.evaluate(document, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
|
||||
testing.expectEqual('p1', first.singleNodeValue.id);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=expression_reuse_distinct_contexts>
|
||||
{
|
||||
// Re-evaluating with a different context node should rescope.
|
||||
const expr = document.createExpression("p", null);
|
||||
|
||||
const all = expr.evaluate(document.body, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(2, all.snapshotLength);
|
||||
|
||||
const empty = expr.evaluate(document.querySelector('h1'),
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(0, empty.snapshotLength);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=create_ns_resolver>
|
||||
{
|
||||
const resolver = document.createNSResolver(document);
|
||||
testing.expectEqual(document, resolver);
|
||||
const elt = document.createNSResolver(document.body);
|
||||
testing.expectEqual(document.body, elt);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=new_xpath_evaluator>
|
||||
{
|
||||
const ev = new XPathEvaluator();
|
||||
testing.expectEqual(true, ev instanceof XPathEvaluator);
|
||||
testing.expectEqual('function', typeof ev.evaluate);
|
||||
testing.expectEqual('function', typeof ev.createExpression);
|
||||
testing.expectEqual('function', typeof ev.createNSResolver);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=evaluator_evaluate>
|
||||
{
|
||||
const ev = new XPathEvaluator();
|
||||
const r = ev.evaluate("//p", document, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(true, r instanceof XPathResult);
|
||||
testing.expectEqual(2, r.snapshotLength);
|
||||
testing.expectEqual('p1', r.snapshotItem(0).id);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=evaluator_create_expression>
|
||||
{
|
||||
const ev = new XPathEvaluator();
|
||||
const expr = ev.createExpression("count(//p)", null);
|
||||
testing.expectEqual(true, expr instanceof XPathExpression);
|
||||
const r = expr.evaluate(document, XPathResult.NUMBER_TYPE, null);
|
||||
testing.expectEqual(2, r.numberValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=evaluator_create_ns_resolver>
|
||||
{
|
||||
const ev = new XPathEvaluator();
|
||||
testing.expectEqual(document, ev.createNSResolver(document));
|
||||
}
|
||||
</script>
|
||||
171
src/browser/tests/xpath/xpath_perf.html
Normal file
171
src/browser/tests/xpath/xpath_perf.html
Normal file
@@ -0,0 +1,171 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>XPath perf benchmark</title>
|
||||
<script src="../testing.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<!--
|
||||
Micro-benchmark for the XPath evaluator.
|
||||
|
||||
Builds a deterministic DOM (TREE_SIZE = 500), then runs each query
|
||||
a few warmup iterations followed by ITERATIONS timed iterations.
|
||||
Reports mean µs/iter via console.warn (the test harness sets
|
||||
log level to .warn and silently drops info-level lines, so
|
||||
console.log doesn't surface; console.warn does). A snapshotLength
|
||||
mismatch fails the test loudly via testing.fail so a regression in
|
||||
result count can't be hidden by the timing line.
|
||||
|
||||
Tun run, uncomment the test in XPathResult.zig (bottom of the file), then:
|
||||
Run: make test F="#xpath_perf"
|
||||
|
||||
Query shapes target the optimization roadmap:
|
||||
//*[@id='x'] — global ID lookup (fast-path candidate)
|
||||
//tag[@id='x'] — typed ID lookup (fast-path candidate)
|
||||
//tag — pure descendant tag scan
|
||||
//*[@class='x'] — non-ID attribute filter (no fast path)
|
||||
(//tag)[1] / [last()] — early-exit candidates (iterator opt)
|
||||
count(//tag) — early-exit candidate (iterator opt)
|
||||
Plus a few that should NOT change so we can detect regressions.
|
||||
|
||||
Tuning: keep TREE_SIZE × ITERATIONS small enough that the test
|
||||
finishes in <1s on debug builds. Adjust ITERATIONS up if numbers
|
||||
are noisy.
|
||||
-->
|
||||
|
||||
<script id=xpath_perf_setup>
|
||||
{
|
||||
const TREE_SIZE = 500;
|
||||
const TAGS = ["div", "span", "p"];
|
||||
const CLASSES = ["alpha", "beta", "gamma"];
|
||||
const TARGET_INDEX = 250;
|
||||
|
||||
// Expose the construction parameters for the run script.
|
||||
window.__perf = { TREE_SIZE, TAGS, CLASSES, TARGET_INDEX };
|
||||
|
||||
// Decorrelate tag (period 3) and class (period 4) so that
|
||||
// //div[@class='alpha'] is not a degenerate restatement of //div.
|
||||
function tagOf(i) { return TAGS[i % TAGS.length]; }
|
||||
function classOf(i) { return CLASSES[(i % 4) % CLASSES.length]; }
|
||||
|
||||
// Pre-compute expected counts so the assertions don't have to
|
||||
// re-derive the formula. Stored on window.__perf for the run script.
|
||||
const tags = Object.fromEntries(TAGS.map(t => [t, 0]));
|
||||
const classes = Object.fromEntries(CLASSES.map(c => [c, 0]));
|
||||
const cross = {};
|
||||
for (let i = 0; i < TREE_SIZE; i++) {
|
||||
const t = tagOf(i), c = classOf(i);
|
||||
tags[t]++;
|
||||
classes[c]++;
|
||||
const k = `${t}-${c}`;
|
||||
cross[k] = (cross[k] || 0) + 1;
|
||||
}
|
||||
window.__perf.tags = tags;
|
||||
window.__perf.classes = classes;
|
||||
window.__perf.cross = cross;
|
||||
window.__perf.targetTag = tagOf(TARGET_INDEX);
|
||||
|
||||
// Build the fixture body via innerHTML in one shot. Faster than
|
||||
// createElement loops because html5ever parses the whole string at
|
||||
// once and we don't pay per-element bridge crossings.
|
||||
const parts = [];
|
||||
for (let i = 0; i < TREE_SIZE; i++) {
|
||||
const id = (i === TARGET_INDEX) ? "target" : `n${i}`;
|
||||
parts.push(`<${tagOf(i)} id="${id}" class="${classOf(i)}">item ${i}</${tagOf(i)}>`);
|
||||
}
|
||||
// Use <main> as the wrapper so //div, //span, //p count only the
|
||||
// generated children (the wrapper itself doesn't share a tag with
|
||||
// any test query).
|
||||
const root = document.createElement("main");
|
||||
root.id = "perf_root";
|
||||
root.innerHTML = parts.join("");
|
||||
document.body.appendChild(root);
|
||||
|
||||
testing.expectEqual(TREE_SIZE, root.children.length);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=xpath_perf_run>
|
||||
{
|
||||
const ITERATIONS = 50;
|
||||
const WARMUP = 3;
|
||||
const { TREE_SIZE, tags, classes, cross, targetTag } = window.__perf;
|
||||
|
||||
const cases = [
|
||||
// --- ID lookups (fast-path #1 candidates) ---
|
||||
{ xp: "//*[@id='target']", expect: 1, label: "id-any" },
|
||||
{ xp: `//${targetTag}[@id='target']`, expect: 1, label: "id-typed-hit" },
|
||||
{ xp: "//div[@id='target']", expect: targetTag === "div" ? 1 : 0, label: "id-typed-miss" },
|
||||
|
||||
// --- Pure tag descendant ---
|
||||
{ xp: "//div", expect: tags.div, label: "tag-descendant" },
|
||||
{ xp: "//span", expect: tags.span, label: "tag-descendant-span" },
|
||||
{ xp: "//*", expect: null, label: "universal-descendant" },
|
||||
|
||||
// --- Attribute equality (no fast path planned) ---
|
||||
{ xp: "//*[@class='alpha']", expect: classes.alpha, label: "class-eq-any" },
|
||||
{ xp: "//div[@class='alpha']", expect: cross["div-alpha"] || 0, label: "class-eq-div" },
|
||||
|
||||
// --- Early-exit candidates (iterator optimization) ---
|
||||
{ xp: "(//div)[1]", expect: 1, label: "first-of-many" },
|
||||
{ xp: "(//div)[last()]", expect: 1, label: "last-of-many" },
|
||||
|
||||
// --- Functions in predicate (regression guard) ---
|
||||
{ xp: "//div[contains(@class,'alpha')]", expect: cross["div-alpha"] || 0, label: "contains-class" },
|
||||
{ xp: "//div[starts-with(@id,'n')]", expect: tags.div - (targetTag === "div" ? 1 : 0), label: "starts-with-id" },
|
||||
|
||||
// --- Counting (number result, iterator early-exit candidate) ---
|
||||
{ xp: "count(//div)", expect: tags.div, label: "count" },
|
||||
];
|
||||
|
||||
function pad(s, w) { s = String(s); return s.length >= w ? s : s + " ".repeat(w - s.length); }
|
||||
|
||||
// Result extractor that handles both node-set and number results.
|
||||
function evalAndCount(xp) {
|
||||
const r = document.evaluate(xp, document, null, XPathResult.ANY_TYPE, null);
|
||||
if (r.resultType === XPathResult.NUMBER_TYPE) return r.numberValue;
|
||||
if (r.resultType === XPathResult.UNORDERED_NODE_ITERATOR_TYPE
|
||||
|| r.resultType === XPathResult.ORDERED_NODE_ITERATOR_TYPE) {
|
||||
let n = 0; while (r.iterateNext()) n++;
|
||||
return n;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Accumulate output into one big string. Per-call console.warn lines
|
||||
// get clobbered by the test harness's interleaved prints; batching
|
||||
// into a single warn call survives the interleaving intact.
|
||||
const out = [];
|
||||
out.push(`tree_size=${TREE_SIZE} iterations=${ITERATIONS} warmup=${WARMUP}`);
|
||||
out.push(`${pad("label", 24)} ${pad("count", 8)} ${pad("µs/iter", 10)} xpath`);
|
||||
|
||||
for (const c of cases) {
|
||||
let actual = null;
|
||||
for (let i = 0; i < WARMUP; i++) {
|
||||
actual = evalAndCount(c.xp);
|
||||
}
|
||||
|
||||
if (c.expect !== null && actual !== c.expect) {
|
||||
testing.fail(`[xpath-perf] ${c.label} returned ${actual}, expected ${c.expect}`);
|
||||
}
|
||||
|
||||
// Time the full document.evaluate call. ANY_TYPE so we don't pay
|
||||
// snapshot allocation we wouldn't pay in the realistic
|
||||
// DOM.performSearch path either.
|
||||
const t0 = performance.now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
document.evaluate(c.xp, document, null, XPathResult.ANY_TYPE, null);
|
||||
}
|
||||
const t1 = performance.now();
|
||||
const us_per_iter = ((t1 - t0) * 1000) / ITERATIONS;
|
||||
|
||||
out.push(`${pad(c.label, 24)} ${pad(actual ?? "-", 8)} ${pad(us_per_iter.toFixed(1), 10)} ${c.xp}`);
|
||||
}
|
||||
|
||||
console.warn("[xpath-perf]\n" + out.map(l => "[xpath-perf] " + l).join("\n"));
|
||||
|
||||
testing.expectEqual(true, true);
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
193
src/browser/tests/xpath/xpath_result.html
Normal file
193
src/browser/tests/xpath/xpath_result.html
Normal file
@@ -0,0 +1,193 @@
|
||||
<!DOCTYPE html>
|
||||
<body>
|
||||
<script src="../testing.js"></script>
|
||||
<h1 id=h>Hello</h1>
|
||||
<p id=p1>One</p>
|
||||
<p id=p2>Two</p>
|
||||
</body>
|
||||
|
||||
<script id=type_constants>
|
||||
{
|
||||
testing.expectEqual(0, XPathResult.ANY_TYPE);
|
||||
testing.expectEqual(1, XPathResult.NUMBER_TYPE);
|
||||
testing.expectEqual(2, XPathResult.STRING_TYPE);
|
||||
testing.expectEqual(3, XPathResult.BOOLEAN_TYPE);
|
||||
testing.expectEqual(4, XPathResult.UNORDERED_NODE_ITERATOR_TYPE);
|
||||
testing.expectEqual(5, XPathResult.ORDERED_NODE_ITERATOR_TYPE);
|
||||
testing.expectEqual(6, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE);
|
||||
testing.expectEqual(7, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE);
|
||||
testing.expectEqual(8, XPathResult.ANY_UNORDERED_NODE_TYPE);
|
||||
testing.expectEqual(9, XPathResult.FIRST_ORDERED_NODE_TYPE);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=instance_constants>
|
||||
{
|
||||
// Type constants are also exposed on instances.
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(7, r.ORDERED_NODE_SNAPSHOT_TYPE);
|
||||
testing.expectEqual(0, r.ANY_TYPE);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=number_value>
|
||||
{
|
||||
const r = document.evaluate("count(//p)", document, null,
|
||||
XPathResult.NUMBER_TYPE, null);
|
||||
testing.expectEqual(XPathResult.NUMBER_TYPE, r.resultType);
|
||||
testing.expectEqual(2, r.numberValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=string_value>
|
||||
{
|
||||
const r = document.evaluate("string(//h1)", document, null,
|
||||
XPathResult.STRING_TYPE, null);
|
||||
testing.expectEqual(XPathResult.STRING_TYPE, r.resultType);
|
||||
testing.expectEqual('Hello', r.stringValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=boolean_value>
|
||||
{
|
||||
const r = document.evaluate("count(//p) > 0", document, null,
|
||||
XPathResult.BOOLEAN_TYPE, null);
|
||||
testing.expectEqual(XPathResult.BOOLEAN_TYPE, r.resultType);
|
||||
testing.expectEqual(true, r.booleanValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=unordered_iterator>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null);
|
||||
testing.expectEqual(XPathResult.UNORDERED_NODE_ITERATOR_TYPE, r.resultType);
|
||||
testing.expectEqual('p1', r.iterateNext().id);
|
||||
testing.expectEqual('p2', r.iterateNext().id);
|
||||
testing.expectEqual(null, r.iterateNext());
|
||||
testing.expectEqual(false, r.invalidIteratorState);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=ordered_iterator>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
|
||||
testing.expectEqual(XPathResult.ORDERED_NODE_ITERATOR_TYPE, r.resultType);
|
||||
testing.expectEqual('p1', r.iterateNext().id);
|
||||
testing.expectEqual('p2', r.iterateNext().id);
|
||||
testing.expectEqual(null, r.iterateNext());
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=unordered_snapshot>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, r.resultType);
|
||||
testing.expectEqual(2, r.snapshotLength);
|
||||
testing.expectEqual('p1', r.snapshotItem(0).id);
|
||||
testing.expectEqual('p2', r.snapshotItem(1).id);
|
||||
testing.expectEqual(null, r.snapshotItem(2));
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=ordered_snapshot>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
testing.expectEqual(XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, r.resultType);
|
||||
testing.expectEqual(2, r.snapshotLength);
|
||||
testing.expectEqual('p1', r.snapshotItem(0).id);
|
||||
testing.expectEqual('p2', r.snapshotItem(1).id);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=any_unordered_node>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.ANY_UNORDERED_NODE_TYPE, null);
|
||||
testing.expectEqual(XPathResult.ANY_UNORDERED_NODE_TYPE, r.resultType);
|
||||
testing.expectEqual('p1', r.singleNodeValue.id);
|
||||
|
||||
const empty = document.evaluate("//nope", document, null,
|
||||
XPathResult.ANY_UNORDERED_NODE_TYPE, null);
|
||||
testing.expectEqual(null, empty.singleNodeValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=first_ordered_node>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.FIRST_ORDERED_NODE_TYPE, null);
|
||||
testing.expectEqual(XPathResult.FIRST_ORDERED_NODE_TYPE, r.resultType);
|
||||
testing.expectEqual('p1', r.singleNodeValue.id);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=any_type_resolution>
|
||||
{
|
||||
// ANY_TYPE settles into the natural category of the result.
|
||||
const ns = document.evaluate("//p", document, null,
|
||||
XPathResult.ANY_TYPE, null);
|
||||
testing.expectEqual(XPathResult.UNORDERED_NODE_ITERATOR_TYPE, ns.resultType);
|
||||
|
||||
const num = document.evaluate("count(//p)", document, null,
|
||||
XPathResult.ANY_TYPE, null);
|
||||
testing.expectEqual(XPathResult.NUMBER_TYPE, num.resultType);
|
||||
testing.expectEqual(2, num.numberValue);
|
||||
|
||||
const str = document.evaluate("string(//h1)", document, null,
|
||||
XPathResult.ANY_TYPE, null);
|
||||
testing.expectEqual(XPathResult.STRING_TYPE, str.resultType);
|
||||
testing.expectEqual('Hello', str.stringValue);
|
||||
|
||||
const bool = document.evaluate("true()", document, null,
|
||||
XPathResult.ANY_TYPE, null);
|
||||
testing.expectEqual(XPathResult.BOOLEAN_TYPE, bool.resultType);
|
||||
testing.expectEqual(true, bool.booleanValue);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=type_mismatch_throws>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
|
||||
// Wrong-typed accessors throw DOMException(InvalidStateError).
|
||||
let n = 0;
|
||||
try { r.numberValue; } catch (e) { n++; }
|
||||
try { r.stringValue; } catch (e) { n++; }
|
||||
try { r.booleanValue; } catch (e) { n++; }
|
||||
try { r.singleNodeValue; } catch (e) { n++; }
|
||||
try { r.iterateNext(); } catch (e) { n++; }
|
||||
testing.expectEqual(5, n);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=node_set_for_scalar_throws>
|
||||
{
|
||||
// Requesting a node-set type for a scalar expression rejects.
|
||||
let threw = false;
|
||||
try {
|
||||
document.evaluate("count(//p)", document, null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
} catch (e) {
|
||||
threw = true;
|
||||
}
|
||||
testing.expectEqual(true, threw);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=iterator_exhaustion>
|
||||
{
|
||||
const r = document.evaluate("//p", document, null,
|
||||
XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
|
||||
testing.expectEqual('p1', r.iterateNext().id);
|
||||
testing.expectEqual('p2', r.iterateNext().id);
|
||||
testing.expectEqual(null, r.iterateNext());
|
||||
// Re-calling on an exhausted iterator stays at null.
|
||||
testing.expectEqual(null, r.iterateNext());
|
||||
}
|
||||
</script>
|
||||
@@ -35,6 +35,8 @@ const DOMImplementation = @import("DOMImplementation.zig");
|
||||
const StyleSheetList = @import("css/StyleSheetList.zig");
|
||||
const FontFaceSet = @import("css/FontFaceSet.zig");
|
||||
const Selection = @import("Selection.zig");
|
||||
const XPathResult = @import("XPathResult.zig");
|
||||
const XPathExpression = @import("XPathExpression.zig");
|
||||
|
||||
pub const XMLDocument = @import("XMLDocument.zig");
|
||||
pub const HTMLDocument = @import("HTMLDocument.zig");
|
||||
@@ -412,6 +414,44 @@ pub fn createNodeIterator(_: *const Document, root: *Node, what_to_show: ?js.Val
|
||||
return DOMNodeIterator.init(root, try whatToShow(what_to_show), filter, frame);
|
||||
}
|
||||
|
||||
pub fn evaluate(
|
||||
self: *Document,
|
||||
expression: []const u8,
|
||||
context_node: ?*Node,
|
||||
resolver: ?js.Function,
|
||||
result_type: ?u16,
|
||||
result: ?*XPathResult,
|
||||
frame: *Frame,
|
||||
) !*XPathResult {
|
||||
// resolver/result are no-ops in HTML mode (decision #2).
|
||||
// Null/missing context_node falls back to the document — matches the
|
||||
// polyfill (decision #2). Firefox throws TypeError on a *missing*
|
||||
// arg, but the bridge can't distinguish "missing" from "explicit
|
||||
// null" here, so polyfill parity wins for the ambiguity.
|
||||
_ = resolver;
|
||||
_ = result;
|
||||
return XPathResult.fromExpression(
|
||||
expression,
|
||||
context_node orelse self.asNode(),
|
||||
result_type orelse XPathResult.ANY_TYPE,
|
||||
frame,
|
||||
);
|
||||
}
|
||||
|
||||
pub fn createExpression(
|
||||
_: *const Document,
|
||||
expression: []const u8,
|
||||
resolver: ?js.Function,
|
||||
frame: *Frame,
|
||||
) !*XPathExpression {
|
||||
_ = resolver;
|
||||
return XPathExpression.init(expression, frame);
|
||||
}
|
||||
|
||||
pub fn createNSResolver(_: *const Document, node: *Node) ?*Node {
|
||||
return node;
|
||||
}
|
||||
|
||||
fn whatToShow(value_: ?js.Value) !u32 {
|
||||
const value = value_ orelse return 4294967295; // show all when undefined
|
||||
if (value.isUndefined()) {
|
||||
@@ -1053,6 +1093,9 @@ pub const JsApi = struct {
|
||||
pub const createEvent = bridge.function(Document.createEvent, .{ .dom_exception = true });
|
||||
pub const createTreeWalker = bridge.function(Document.createTreeWalker, .{});
|
||||
pub const createNodeIterator = bridge.function(Document.createNodeIterator, .{});
|
||||
pub const evaluate = bridge.function(Document.evaluate, .{ .dom_exception = true });
|
||||
pub const createExpression = bridge.function(Document.createExpression, .{ .dom_exception = true });
|
||||
pub const createNSResolver = bridge.function(Document.createNSResolver, .{});
|
||||
pub const getElementById = bridge.function(_getElementById, .{});
|
||||
fn _getElementById(self: *Document, value_: ?js.Value, frame: *Frame) !?*Element {
|
||||
const value = value_ orelse return null;
|
||||
@@ -1113,3 +1156,7 @@ const testing = @import("../../testing.zig");
|
||||
test "WebApi: Document" {
|
||||
try testing.htmlRunner("document", .{});
|
||||
}
|
||||
|
||||
test "WebApi: Document.evaluate" {
|
||||
try testing.htmlRunner("xpath/document_evaluate.html", .{});
|
||||
}
|
||||
|
||||
97
src/browser/webapi/XPathEvaluator.zig
Normal file
97
src/browser/webapi/XPathEvaluator.zig
Normal file
@@ -0,0 +1,97 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! WHATWG `XPathEvaluator` — a stateless factory for XPath evaluation.
|
||||
//! Mirrors `Document.evaluate` / `Document.createExpression` /
|
||||
//! `Document.createNSResolver` so an explicit
|
||||
//! `new XPathEvaluator()` instance can be used in place of the
|
||||
//! document.
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
const js = @import("../js/js.zig");
|
||||
const Frame = @import("../Frame.zig");
|
||||
|
||||
const Node = @import("Node.zig");
|
||||
const XPathResult = @import("XPathResult.zig");
|
||||
const XPathExpression = @import("XPathExpression.zig");
|
||||
|
||||
const XPathEvaluator = @This();
|
||||
|
||||
// Padding to avoid zero-size struct identity_map collisions (matches
|
||||
// the convention in ResizeObserver.zig).
|
||||
_pad: bool = false,
|
||||
|
||||
pub fn init() XPathEvaluator {
|
||||
return .{};
|
||||
}
|
||||
|
||||
pub fn evaluate(
|
||||
_: *const XPathEvaluator,
|
||||
expression: []const u8,
|
||||
context_node: *Node,
|
||||
resolver: ?js.Function,
|
||||
requested_type: ?u16,
|
||||
result: ?*XPathResult,
|
||||
frame: *Frame,
|
||||
) !*XPathResult {
|
||||
// Namespace resolver is accepted-and-ignored (HTML mode — decision #2).
|
||||
// Result reuse is also a no-op; XPathResult.fromExpression always
|
||||
// allocates a fresh instance.
|
||||
_ = resolver;
|
||||
_ = result;
|
||||
return XPathResult.fromExpression(expression, context_node, requested_type orelse XPathResult.ANY_TYPE, frame);
|
||||
}
|
||||
|
||||
pub fn createExpression(
|
||||
_: *const XPathEvaluator,
|
||||
expression: []const u8,
|
||||
resolver: ?js.Function,
|
||||
frame: *Frame,
|
||||
) !*XPathExpression {
|
||||
_ = resolver;
|
||||
return XPathExpression.init(expression, frame);
|
||||
}
|
||||
|
||||
pub fn createNSResolver(_: *const XPathEvaluator, node: *Node) ?*Node {
|
||||
// HTML-mode passthrough — the WHATWG IDL accepts a Node and returns
|
||||
// an `XPathNSResolver`, but in practice the input node is reused.
|
||||
return node;
|
||||
}
|
||||
|
||||
pub const JsApi = struct {
|
||||
pub const bridge = js.Bridge(XPathEvaluator);
|
||||
|
||||
pub const Meta = struct {
|
||||
pub const name = "XPathEvaluator";
|
||||
pub const prototype_chain = bridge.prototypeChain();
|
||||
pub var class_id: bridge.ClassId = undefined;
|
||||
pub const empty_with_no_proto = true;
|
||||
};
|
||||
|
||||
pub const constructor = bridge.constructor(XPathEvaluator.init, .{});
|
||||
pub const evaluate = bridge.function(XPathEvaluator.evaluate, .{ .dom_exception = true });
|
||||
pub const createExpression = bridge.function(XPathEvaluator.createExpression, .{ .dom_exception = true });
|
||||
pub const createNSResolver = bridge.function(XPathEvaluator.createNSResolver, .{});
|
||||
};
|
||||
|
||||
const testing = @import("../../testing.zig");
|
||||
|
||||
test "WebApi: XPathEvaluator + XPathExpression" {
|
||||
try testing.htmlRunner("xpath/xpath_evaluator.html", .{});
|
||||
}
|
||||
105
src/browser/webapi/XPathExpression.zig
Normal file
105
src/browser/webapi/XPathExpression.zig
Normal file
@@ -0,0 +1,105 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! WHATWG `XPathExpression` — a parsed XPath expression cached for
|
||||
//! repeated evaluation. The parsed AST lives in this object's per-
|
||||
//! instance arena (long-lived); each `evaluate()` call gets a fresh
|
||||
//! arena for its own result data so multiple evaluations don't grow
|
||||
//! the AST arena.
|
||||
|
||||
const std = @import("std");
|
||||
const lp = @import("lightpanda");
|
||||
|
||||
const js = @import("../js/js.zig");
|
||||
const Page = @import("../Page.zig");
|
||||
const Frame = @import("../Frame.zig");
|
||||
|
||||
const Node = @import("Node.zig");
|
||||
const XPathResult = @import("XPathResult.zig");
|
||||
|
||||
const xpath = struct {
|
||||
const Ast = @import("../xpath/ast.zig");
|
||||
const Parser = @import("../xpath/Parser.zig");
|
||||
const Evaluator = @import("../xpath/Evaluator.zig");
|
||||
};
|
||||
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const XPathExpression = @This();
|
||||
|
||||
_rc: lp.RC(u8) = .{},
|
||||
_arena: Allocator,
|
||||
_expr: *const xpath.Ast.Expr,
|
||||
|
||||
pub fn init(expression: []const u8, frame: *Frame) !*XPathExpression {
|
||||
const arena = try frame.getArena(.tiny, "XPathExpression");
|
||||
errdefer frame.releaseArena(arena);
|
||||
|
||||
// The AST borrows string slices from its input (literals, names,
|
||||
// var refs, function names). `expression` is materialized in the JS
|
||||
// call_arena and is reclaimed when the top-level call returns, so
|
||||
// dupe into our long-lived arena before parsing.
|
||||
const owned = try arena.dupe(u8, expression);
|
||||
const expr = try xpath.Parser.parse(arena, owned);
|
||||
const xe = try arena.create(XPathExpression);
|
||||
xe.* = .{ ._arena = arena, ._expr = expr };
|
||||
return xe;
|
||||
}
|
||||
|
||||
pub fn evaluate(
|
||||
self: *XPathExpression,
|
||||
context_node: *Node,
|
||||
requested_type: ?u16,
|
||||
result: ?*XPathResult,
|
||||
frame: *Frame,
|
||||
) !*XPathResult {
|
||||
// The `result` reuse parameter (WHATWG: optional XPathResult to
|
||||
// populate) is accepted-and-ignored: we always allocate fresh,
|
||||
// which matches every modern browser's effective behavior.
|
||||
_ = result;
|
||||
|
||||
const arena = try frame.getArena(.medium, "XPathResult");
|
||||
errdefer frame.releaseArena(arena);
|
||||
|
||||
const eval_result = try xpath.Evaluator.evaluate(arena, self._expr, context_node, frame);
|
||||
return XPathResult.fromResult(arena, requested_type orelse XPathResult.ANY_TYPE, eval_result);
|
||||
}
|
||||
|
||||
pub fn deinit(self: *XPathExpression, page: *Page) void {
|
||||
page.releaseArena(self._arena);
|
||||
}
|
||||
|
||||
pub fn acquireRef(self: *XPathExpression) void {
|
||||
self._rc.acquire();
|
||||
}
|
||||
|
||||
pub fn releaseRef(self: *XPathExpression, page: *Page) void {
|
||||
self._rc.release(self, page);
|
||||
}
|
||||
|
||||
pub const JsApi = struct {
|
||||
pub const bridge = js.Bridge(XPathExpression);
|
||||
|
||||
pub const Meta = struct {
|
||||
pub const name = "XPathExpression";
|
||||
pub const prototype_chain = bridge.prototypeChain();
|
||||
pub var class_id: bridge.ClassId = undefined;
|
||||
};
|
||||
|
||||
pub const evaluate = bridge.function(XPathExpression.evaluate, .{ .dom_exception = true });
|
||||
};
|
||||
288
src/browser/webapi/XPathResult.zig
Normal file
288
src/browser/webapi/XPathResult.zig
Normal file
@@ -0,0 +1,288 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! WHATWG `XPathResult` (full surface, all 10 type constants — decision
|
||||
//! #4). Wraps the evaluator's `result.Result` for JS consumption:
|
||||
//! coerces to the requested result type at construction, exposes the
|
||||
//! type-tagged accessors, and serves the iterator/snapshot APIs.
|
||||
//!
|
||||
//! Lifetime model: each `XPathResult` owns a per-instance arena
|
||||
//! (`getArena(.medium, ...)`) that holds both the struct and the result
|
||||
//! data (node-set slice, formatted strings). The arena is released in
|
||||
//! `deinit` once the JS wrapper's refcount hits zero.
|
||||
//!
|
||||
//! Type-mismatch accessor calls return `error.InvalidStateError` —
|
||||
//! translated to a `DOMException` by `bridge.function(.., .{
|
||||
//! .dom_exception = true })`. The WHATWG IDL technically specifies
|
||||
//! `TypeError` for type mismatches, but `InvalidStateError` is what
|
||||
//! decision #4 captures and what most legacy XPath consumers expect.
|
||||
|
||||
const std = @import("std");
|
||||
const lp = @import("lightpanda");
|
||||
|
||||
const js = @import("../js/js.zig");
|
||||
const Page = @import("../Page.zig");
|
||||
const Frame = @import("../Frame.zig");
|
||||
|
||||
const Node = @import("Node.zig");
|
||||
|
||||
// XPath runtime helpers. Aliased to keep the cross-directory imports
|
||||
// readable when both modules expose a `Result` type.
|
||||
const xpath = struct {
|
||||
const result = @import("../xpath/result.zig");
|
||||
const Parser = @import("../xpath/Parser.zig");
|
||||
const Evaluator = @import("../xpath/Evaluator.zig");
|
||||
};
|
||||
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const XPathResult = @This();
|
||||
|
||||
// WHATWG type constants. ANY_TYPE is a request flag — at construction
|
||||
// it resolves to one of the four concrete categories (NUMBER, STRING,
|
||||
// BOOLEAN, UNORDERED_NODE_ITERATOR) depending on what the expression
|
||||
// produced.
|
||||
pub const ANY_TYPE: u16 = 0;
|
||||
pub const NUMBER_TYPE: u16 = 1;
|
||||
pub const STRING_TYPE: u16 = 2;
|
||||
pub const BOOLEAN_TYPE: u16 = 3;
|
||||
pub const UNORDERED_NODE_ITERATOR_TYPE: u16 = 4;
|
||||
pub const ORDERED_NODE_ITERATOR_TYPE: u16 = 5;
|
||||
pub const UNORDERED_NODE_SNAPSHOT_TYPE: u16 = 6;
|
||||
pub const ORDERED_NODE_SNAPSHOT_TYPE: u16 = 7;
|
||||
pub const ANY_UNORDERED_NODE_TYPE: u16 = 8;
|
||||
pub const FIRST_ORDERED_NODE_TYPE: u16 = 9;
|
||||
|
||||
const Value = union(enum) {
|
||||
number: f64,
|
||||
string: []const u8,
|
||||
boolean: bool,
|
||||
nodes: []const *Node,
|
||||
};
|
||||
|
||||
_rc: lp.RC(u8) = .{},
|
||||
_arena: Allocator,
|
||||
_type: u16,
|
||||
_value: Value,
|
||||
_iter_pos: usize = 0,
|
||||
|
||||
// ----- constructors -----
|
||||
|
||||
/// One-shot: parse + evaluate + wrap. Used by `Document.evaluate` and
|
||||
/// `XPathEvaluator.evaluate`. Allocates a per-instance arena for the
|
||||
/// AST + result data + the struct itself.
|
||||
pub fn fromExpression(
|
||||
expression: []const u8,
|
||||
context_node: *Node,
|
||||
requested_type: u16,
|
||||
frame: *Frame,
|
||||
) !*XPathResult {
|
||||
const arena = try frame.getArena(.medium, "XPathResult");
|
||||
errdefer frame.releaseArena(arena);
|
||||
|
||||
// The AST borrows string slices from its input (literals, names,
|
||||
// var refs, function names). `expression` is materialized in the JS
|
||||
// call_arena and is reclaimed when the top-level call returns, so
|
||||
// dupe into our long-lived arena before parsing.
|
||||
const owned = try arena.dupe(u8, expression);
|
||||
const expr = try xpath.Parser.parse(arena, owned);
|
||||
const result = try xpath.Evaluator.evaluate(arena, expr, context_node, frame);
|
||||
return fromResult(arena, requested_type, result);
|
||||
}
|
||||
|
||||
/// Wrap an already-evaluated `result.result` into an XPathResult. The
|
||||
/// caller hands over ownership of `arena` — the XPathResult will release
|
||||
/// it on deinit. Used by `XPathExpression.evaluate` (which has its own
|
||||
/// AST cache and only allocates a fresh result arena).
|
||||
pub fn fromResult(
|
||||
arena: Allocator,
|
||||
requested_type: u16,
|
||||
result: xpath.result.Result,
|
||||
) !*XPathResult {
|
||||
const value: Value = switch (requested_type) {
|
||||
ANY_TYPE => switch (result) {
|
||||
.number => |n| .{ .number = n },
|
||||
.string => |s| .{ .string = s },
|
||||
.boolean => |b| .{ .boolean = b },
|
||||
.node_set => |ns| .{ .nodes = ns },
|
||||
},
|
||||
NUMBER_TYPE => .{ .number = try xpath.result.toNumber(arena, result) },
|
||||
STRING_TYPE => .{ .string = try xpath.result.toString(arena, result) },
|
||||
BOOLEAN_TYPE => .{ .boolean = xpath.result.toBoolean(result) },
|
||||
UNORDERED_NODE_ITERATOR_TYPE,
|
||||
ORDERED_NODE_ITERATOR_TYPE,
|
||||
UNORDERED_NODE_SNAPSHOT_TYPE,
|
||||
ORDERED_NODE_SNAPSHOT_TYPE,
|
||||
ANY_UNORDERED_NODE_TYPE,
|
||||
FIRST_ORDERED_NODE_TYPE,
|
||||
=> switch (result) {
|
||||
.node_set => |ns| .{ .nodes = ns },
|
||||
// Requesting a node-set type for a non-node-set expression.
|
||||
// WHATWG specifies TypeError, but DOMException.fromError has
|
||||
// no TypeError mapping (would surface as a plain JS Error);
|
||||
// unify on InvalidStateError per the project plan.
|
||||
else => return error.InvalidStateError,
|
||||
},
|
||||
else => return error.InvalidStateError,
|
||||
};
|
||||
|
||||
const final_type: u16 = if (requested_type == ANY_TYPE) switch (value) {
|
||||
.number => NUMBER_TYPE,
|
||||
.string => STRING_TYPE,
|
||||
.boolean => BOOLEAN_TYPE,
|
||||
.nodes => UNORDERED_NODE_ITERATOR_TYPE,
|
||||
} else requested_type;
|
||||
|
||||
const xr = try arena.create(XPathResult);
|
||||
xr.* = .{
|
||||
._arena = arena,
|
||||
._type = final_type,
|
||||
._value = value,
|
||||
};
|
||||
return xr;
|
||||
}
|
||||
|
||||
// ----- lifecycle -----
|
||||
|
||||
pub fn deinit(self: *XPathResult, page: *Page) void {
|
||||
page.releaseArena(self._arena);
|
||||
}
|
||||
|
||||
pub fn acquireRef(self: *XPathResult) void {
|
||||
self._rc.acquire();
|
||||
}
|
||||
|
||||
pub fn releaseRef(self: *XPathResult, page: *Page) void {
|
||||
self._rc.release(self, page);
|
||||
}
|
||||
|
||||
// ----- accessors -----
|
||||
|
||||
fn getResultType(self: *const XPathResult) u16 {
|
||||
return self._type;
|
||||
}
|
||||
|
||||
fn getNumberValue(self: *const XPathResult) !f64 {
|
||||
if (self._type != NUMBER_TYPE) return error.InvalidStateError;
|
||||
return self._value.number;
|
||||
}
|
||||
|
||||
fn getStringValue(self: *const XPathResult) ![]const u8 {
|
||||
if (self._type != STRING_TYPE) return error.InvalidStateError;
|
||||
return self._value.string;
|
||||
}
|
||||
|
||||
fn getBooleanValue(self: *const XPathResult) !bool {
|
||||
if (self._type != BOOLEAN_TYPE) return error.InvalidStateError;
|
||||
return self._value.boolean;
|
||||
}
|
||||
|
||||
fn getSingleNodeValue(self: *const XPathResult) !?*Node {
|
||||
if (self._type != ANY_UNORDERED_NODE_TYPE and self._type != FIRST_ORDERED_NODE_TYPE) {
|
||||
return error.InvalidStateError;
|
||||
}
|
||||
return if (self._value.nodes.len == 0) null else self._value.nodes[0];
|
||||
}
|
||||
|
||||
fn getSnapshotLength(self: *const XPathResult) !u32 {
|
||||
if (self._type != UNORDERED_NODE_SNAPSHOT_TYPE and self._type != ORDERED_NODE_SNAPSHOT_TYPE) {
|
||||
return error.InvalidStateError;
|
||||
}
|
||||
return @intCast(self._value.nodes.len);
|
||||
}
|
||||
|
||||
/// Live mutation tracking on the iterator isn't implemented — we hold a
|
||||
/// frozen pointer slice, so the iterator is never "invalidated" by DOM
|
||||
/// edits during traversal. Always returns false; matches the polyfill,
|
||||
/// which is snapshot-only.
|
||||
fn getInvalidIteratorState(_: *const XPathResult) bool {
|
||||
return false;
|
||||
}
|
||||
|
||||
// ----- methods -----
|
||||
|
||||
pub fn iterateNext(self: *XPathResult) !?*Node {
|
||||
if (self._type != UNORDERED_NODE_ITERATOR_TYPE and self._type != ORDERED_NODE_ITERATOR_TYPE) {
|
||||
return error.InvalidStateError;
|
||||
}
|
||||
const pos = self._iter_pos;
|
||||
if (pos >= self._value.nodes.len) return null;
|
||||
const node = self._value.nodes[pos];
|
||||
self._iter_pos = pos + 1;
|
||||
return node;
|
||||
}
|
||||
|
||||
pub fn snapshotItem(self: *const XPathResult, index: u32) !?*Node {
|
||||
if (self._type != UNORDERED_NODE_SNAPSHOT_TYPE and self._type != ORDERED_NODE_SNAPSHOT_TYPE) {
|
||||
return error.InvalidStateError;
|
||||
}
|
||||
if (index >= self._value.nodes.len) return null;
|
||||
return self._value.nodes[index];
|
||||
}
|
||||
|
||||
// ----- JS bridge -----
|
||||
|
||||
pub const JsApi = struct {
|
||||
pub const bridge = js.Bridge(XPathResult);
|
||||
|
||||
pub const Meta = struct {
|
||||
pub const name = "XPathResult";
|
||||
pub const prototype_chain = bridge.prototypeChain();
|
||||
pub var class_id: bridge.ClassId = undefined;
|
||||
};
|
||||
|
||||
// Type constants — both static (on the constructor) and instance
|
||||
// properties per the WHATWG IDL. `template = true` makes them
|
||||
// class-level so `XPathResult.ORDERED_NODE_SNAPSHOT_TYPE` works.
|
||||
pub const ANY_TYPE = bridge.property(XPathResult.ANY_TYPE, .{ .template = true });
|
||||
pub const NUMBER_TYPE = bridge.property(XPathResult.NUMBER_TYPE, .{ .template = true });
|
||||
pub const STRING_TYPE = bridge.property(XPathResult.STRING_TYPE, .{ .template = true });
|
||||
pub const BOOLEAN_TYPE = bridge.property(XPathResult.BOOLEAN_TYPE, .{ .template = true });
|
||||
pub const UNORDERED_NODE_ITERATOR_TYPE = bridge.property(XPathResult.UNORDERED_NODE_ITERATOR_TYPE, .{ .template = true });
|
||||
pub const ORDERED_NODE_ITERATOR_TYPE = bridge.property(XPathResult.ORDERED_NODE_ITERATOR_TYPE, .{ .template = true });
|
||||
pub const UNORDERED_NODE_SNAPSHOT_TYPE = bridge.property(XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, .{ .template = true });
|
||||
pub const ORDERED_NODE_SNAPSHOT_TYPE = bridge.property(XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, .{ .template = true });
|
||||
pub const ANY_UNORDERED_NODE_TYPE = bridge.property(XPathResult.ANY_UNORDERED_NODE_TYPE, .{ .template = true });
|
||||
pub const FIRST_ORDERED_NODE_TYPE = bridge.property(XPathResult.FIRST_ORDERED_NODE_TYPE, .{ .template = true });
|
||||
|
||||
pub const resultType = bridge.accessor(XPathResult.getResultType, null, .{});
|
||||
pub const numberValue = bridge.accessor(XPathResult.getNumberValue, null, .{ .dom_exception = true });
|
||||
pub const stringValue = bridge.accessor(XPathResult.getStringValue, null, .{ .dom_exception = true });
|
||||
pub const booleanValue = bridge.accessor(XPathResult.getBooleanValue, null, .{ .dom_exception = true });
|
||||
pub const singleNodeValue = bridge.accessor(XPathResult.getSingleNodeValue, null, .{ .dom_exception = true });
|
||||
pub const snapshotLength = bridge.accessor(XPathResult.getSnapshotLength, null, .{ .dom_exception = true });
|
||||
pub const invalidIteratorState = bridge.accessor(XPathResult.getInvalidIteratorState, null, .{});
|
||||
|
||||
pub const iterateNext = bridge.function(XPathResult.iterateNext, .{ .dom_exception = true });
|
||||
pub const snapshotItem = bridge.function(XPathResult.snapshotItem, .{ .dom_exception = true });
|
||||
};
|
||||
|
||||
const testing = @import("../../testing.zig");
|
||||
|
||||
test "WebApi: XPathResult" {
|
||||
try testing.htmlRunner("xpath/xpath_result.html", .{});
|
||||
}
|
||||
|
||||
test "WebApi: XPath conformance" {
|
||||
try testing.htmlRunner("xpath/xpath_conformance.html", .{});
|
||||
}
|
||||
|
||||
// This uses console.warn, uncomment if you want to run it
|
||||
// test "WebApi: XPath perf" {
|
||||
// try testing.htmlRunner("xpath/xpath_perf.html", .{});
|
||||
// }
|
||||
987
src/browser/xpath/Evaluator.zig
Normal file
987
src/browser/xpath/Evaluator.zig
Normal file
@@ -0,0 +1,987 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! XPath 1.0 evaluator — runs an `ast.Expr` against a context node and
|
||||
//! produces a `Result`. The evaluator allocates intermediate values
|
||||
//! (node-set slices, formatted numbers, materialized attribute nodes)
|
||||
//! into the caller's arena. The context `Frame` is needed for
|
||||
//! `getElementById` and to materialize attributes (the attribute axis
|
||||
//! returns full `Attribute` nodes so the result is `*Node`-uniform).
|
||||
//!
|
||||
//! Document-order sort happens once at the public boundary
|
||||
//! (`evaluate()`); intermediate step results stay in axis order so
|
||||
//! reverse-axis positional predicates evaluate against proximity.
|
||||
|
||||
const std = @import("std");
|
||||
const lp = @import("lightpanda");
|
||||
|
||||
const Node = @import("../webapi/Node.zig");
|
||||
|
||||
const ast = @import("ast.zig");
|
||||
const Parser = @import("Parser.zig");
|
||||
const result = @import("result.zig");
|
||||
const functions = @import("functions.zig");
|
||||
|
||||
const Frame = lp.Frame;
|
||||
const Element = Node.Element;
|
||||
const Document = Node.Document;
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const Evaluator = @This();
|
||||
|
||||
pub const Error = error{
|
||||
OutOfMemory,
|
||||
WriteFailed,
|
||||
// Surfaces from Attribute materialization (`Entry.toAttribute` →
|
||||
// `String.dupe` enforces a length limit). The polyfill never hits
|
||||
// this since JS strings are unbounded, but Lightpanda's `String`
|
||||
// type caps at u32::MAX bytes — propagate so callers can surface
|
||||
// a DOM exception.
|
||||
StringTooLarge,
|
||||
UnknownFunction,
|
||||
UnionRequiresNodeSets,
|
||||
};
|
||||
|
||||
arena: Allocator,
|
||||
frame: *Frame,
|
||||
|
||||
/// Public entry. Returns the AST's value; node-sets are sorted into
|
||||
/// document order before return per XPath spec §3.3.
|
||||
pub fn evaluate(arena: Allocator, expr: *const ast.Expr, context_node: *Node, frame: *Frame) Error!result.Result {
|
||||
var ev = Evaluator{ .arena = arena, .frame = frame };
|
||||
const res = try ev.evalExpr(expr, context_node, 1, 1);
|
||||
if (res == .node_set) {
|
||||
sortDocOrder(@constCast(res.node_set));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub const SearchError = Error || Parser.Error;
|
||||
|
||||
/// Convenience for `DOM.performSearch`: parse + evaluate and unwrap the
|
||||
/// node-set. Top-level scalar expressions yield an empty slice
|
||||
/// (decision #3 — these APIs are for finding nodes, not arbitrary
|
||||
/// computation).
|
||||
pub fn searchAll(arena: Allocator, root: *Node, expression: []const u8, frame: *Frame) SearchError![]const *Node {
|
||||
const expr = try Parser.parse(arena, expression);
|
||||
return switch (try evaluate(arena, expr, root, frame)) {
|
||||
.node_set => |ns| ns,
|
||||
else => &.{},
|
||||
};
|
||||
}
|
||||
|
||||
// ----- AST evaluation -----
|
||||
|
||||
fn evalExpr(self: *Evaluator, expr: *const ast.Expr, ctx: *Node, pos: usize, size: usize) Error!result.Result {
|
||||
return switch (expr.*) {
|
||||
.number => |n| .{ .number = n },
|
||||
.literal => |s| .{ .string = s },
|
||||
.var_ref => .{ .string = "" }, // decision #3 stub
|
||||
.neg => |inner| blk: {
|
||||
const v = try self.evalExpr(inner, ctx, pos, size);
|
||||
const n = try result.toNumber(self.arena, v);
|
||||
break :blk .{ .number = -n };
|
||||
},
|
||||
.binop => |bo| try self.evalBinop(bo, ctx, pos, size),
|
||||
.path => |p| try self.evalPath(p, ctx),
|
||||
.filter_path => |fp| try self.evalFilterPath(fp, ctx, pos, size),
|
||||
.filter => |f| try self.evalFilter(f, ctx, pos, size),
|
||||
.fn_call => |fc| try self.evalFnCall(fc, ctx, pos, size),
|
||||
};
|
||||
}
|
||||
|
||||
fn evalPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!result.Result {
|
||||
if (try self.tryIdLookupFastPath(path, ctx)) |res| return res;
|
||||
if (try self.tryFusedDescendantFastPath(path, ctx)) |res| return res;
|
||||
|
||||
const start: *Node = if (path.absolute) blk: {
|
||||
if (ctx._type == .document) break :blk ctx;
|
||||
const owner = ctx.ownerDocument(self.frame) orelse break :blk ctx;
|
||||
break :blk owner.asNode();
|
||||
} else ctx;
|
||||
|
||||
var current = try self.arena.alloc(*Node, 1);
|
||||
current[0] = start;
|
||||
var current_set: []const *Node = current;
|
||||
|
||||
for (path.steps) |step| {
|
||||
const r = try self.evalStep(current_set, step);
|
||||
current_set = r.node_set;
|
||||
}
|
||||
return .{ .node_set = current_set };
|
||||
}
|
||||
|
||||
// Recognize the very common `//tag[@id='x']` and `.//tag[@id='x']`
|
||||
// shapes (and their wildcard `//*[@id='x']` variants) and serve them
|
||||
// directly from `frame.getElementByIdFromNode`. Accepts the literal on
|
||||
// either side of `=`.
|
||||
//
|
||||
// Mirrors the same tradeoff `webapi/selector/List.zig:optimizeSelector`
|
||||
// already makes for `querySelector(All)`: the id-map only stores the
|
||||
// first element per ID in document order, so duplicate IDs (invalid
|
||||
// HTML, but possible) yield one match here where a strict tree walk
|
||||
// would find all. Acceptable because Capybara/Selenium hot paths
|
||||
// assume unique IDs and CSS has shipped this compromise for years.
|
||||
//
|
||||
// Falls through to the general path for any deviation: extra steps,
|
||||
// extra predicates, non-eq predicate, non-literal RHS, or the
|
||||
// inability to resolve a search root.
|
||||
fn tryIdLookupFastPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!?result.Result {
|
||||
// Two acceptable AST shapes:
|
||||
// //tag[@id='x'] parses to: ds::node() / child::tag[pred]
|
||||
// .//tag[@id='x'] parses to: self::node() / ds::node() / child::tag[pred]
|
||||
const target = matchDescendantPathShape(path) orelse return null;
|
||||
|
||||
if (target.axis != .child) return null;
|
||||
if (target.predicates.len != 1) return null;
|
||||
|
||||
// Tag name (null = wildcard "*"). type_test (e.g. `node()`,
|
||||
// `text()`) doesn't qualify because getElementByIdFromNode only
|
||||
// returns elements.
|
||||
const tag_name: ?[]const u8 = switch (target.node_test) {
|
||||
.name => |n| if (std.mem.eql(u8, n, "*")) null else n,
|
||||
.type_test => return null,
|
||||
};
|
||||
|
||||
const id_value = matchAttrEqLiteral(target.predicates[0], "id") orelse return null;
|
||||
|
||||
// Resolve search root the same way the general path does.
|
||||
const search_root: *Node = if (path.absolute) blk: {
|
||||
if (ctx._type == .document) break :blk ctx;
|
||||
const owner = ctx.ownerDocument(self.frame) orelse return null;
|
||||
break :blk owner.asNode();
|
||||
} else ctx;
|
||||
|
||||
const id_element = self.frame.getElementByIdFromNode(search_root, id_value) orelse {
|
||||
return .{ .node_set = &.{} };
|
||||
};
|
||||
const id_node = id_element.asNode();
|
||||
|
||||
// Relative paths must filter to descendants of the context.
|
||||
// getElementByIdFromNode is doc-wide.
|
||||
if (search_root != id_node and !search_root.contains(id_node)) {
|
||||
return .{ .node_set = &.{} };
|
||||
}
|
||||
|
||||
// Tag check (case-insensitive per decision #2). Element tag names
|
||||
// are stored lowercase via `getTagNameLower`; lowercase the AST
|
||||
// name once and compare.
|
||||
if (tag_name) |tag| {
|
||||
const lowered = try std.ascii.allocLowerString(self.arena, tag);
|
||||
if (!std.mem.eql(u8, lowered, id_element.getTagNameLower())) {
|
||||
return .{ .node_set = &.{} };
|
||||
}
|
||||
}
|
||||
|
||||
const out = try self.arena.alloc(*Node, 1);
|
||||
out[0] = id_node;
|
||||
return .{ .node_set = out };
|
||||
}
|
||||
|
||||
// Generalization of `tryIdLookupFastPath` to non-ID predicates. Same
|
||||
// AST shape (`//<test>[preds]` / `.//<test>[preds]`), but instead of
|
||||
// dispatching to `getElementByIdFromNode`, walks the descendants of
|
||||
// the search root once in document order, applying the node test and
|
||||
// any "safe" non-positional predicates inline. Skips the general path's
|
||||
// per-step axis materialization, the per-step `filtered`/`current`
|
||||
// ArrayLists, and the dedup hash map (single-context forward walk
|
||||
// already preserves doc order).
|
||||
//
|
||||
// Hits the bulk of the benchmark's remaining cost: `//div`, `//*`,
|
||||
// `//*[@class='x']`, `//div[@class='x']`, `//div[contains(@class,'x')]`.
|
||||
//
|
||||
// "Safe" predicates: not numeric at the top level (number, neg,
|
||||
// arithmetic binop, or a fn-call returning a number), and free of
|
||||
// `position()`/`last()` anywhere in the predicate AST. Numeric predicates
|
||||
// would need `position()` context which the fused walk doesn't track,
|
||||
// and a `position()`/`last()` reference inside a sub-path's own step is
|
||||
// rejected conservatively even though it's local to that sub-axis.
|
||||
fn tryFusedDescendantFastPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!?result.Result {
|
||||
const target = matchDescendantPathShape(path) orelse return null;
|
||||
if (target.axis != .child) return null;
|
||||
|
||||
for (target.predicates) |p| {
|
||||
if (!isSafeNonPositionalPredicate(p)) return null;
|
||||
}
|
||||
|
||||
const lowered_name: ?[]const u8 = switch (target.node_test) {
|
||||
.name => |n| if (std.mem.eql(u8, n, "*")) null else try std.ascii.allocLowerString(self.arena, n),
|
||||
.type_test => null,
|
||||
};
|
||||
|
||||
const search_root: *Node = if (path.absolute) blk: {
|
||||
if (ctx._type == .document) break :blk ctx;
|
||||
const owner = ctx.ownerDocument(self.frame) orelse return null;
|
||||
break :blk owner.asNode();
|
||||
} else ctx;
|
||||
|
||||
var out: std.ArrayList(*Node) = .empty;
|
||||
try self.fusedDescend(search_root, target, lowered_name, &out);
|
||||
return .{ .node_set = out.items };
|
||||
}
|
||||
|
||||
fn fusedDescend(
|
||||
self: *Evaluator,
|
||||
parent: *Node,
|
||||
target: ast.Step,
|
||||
lowered_name: ?[]const u8,
|
||||
out: *std.ArrayList(*Node),
|
||||
) Error!void {
|
||||
var it = parent.childrenIterator();
|
||||
while (it.next()) |c| {
|
||||
if (matchTest(c, target.node_test, target.axis, lowered_name)) {
|
||||
var ok = true;
|
||||
for (target.predicates) |pred| {
|
||||
// Position / size are synthetic. Safe because the
|
||||
// predicate-safety gate already rejected any expression
|
||||
// that depends on either.
|
||||
const val = try self.evalExpr(pred, c, 1, 1);
|
||||
if (!result.toBoolean(val)) {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ok) try out.append(self.arena, c);
|
||||
}
|
||||
try self.fusedDescend(c, target, lowered_name, out);
|
||||
}
|
||||
}
|
||||
|
||||
fn matchDescendantPathShape(path: ast.Path) ?ast.Step {
|
||||
return switch (path.steps.len) {
|
||||
2 => blk: {
|
||||
if (!isDescendantOrSelfNode(path.steps[0])) break :blk null;
|
||||
break :blk path.steps[1];
|
||||
},
|
||||
3 => blk: {
|
||||
if (!isSelfNode(path.steps[0])) break :blk null;
|
||||
if (!isDescendantOrSelfNode(path.steps[1])) break :blk null;
|
||||
break :blk path.steps[2];
|
||||
},
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
fn isSafeNonPositionalPredicate(expr: *const ast.Expr) bool {
|
||||
if (isNumericTopLevel(expr)) return false;
|
||||
if (containsPositionOrLast(expr)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
fn isNumericTopLevel(expr: *const ast.Expr) bool {
|
||||
return switch (expr.*) {
|
||||
.number, .neg => true,
|
||||
.binop => |bo| switch (bo.op) {
|
||||
.add, .sub, .mul, .div, .mod => true,
|
||||
else => false,
|
||||
},
|
||||
.fn_call => |fc| isNumericFnName(fc.name),
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn isNumericFnName(name: []const u8) bool {
|
||||
const numeric = [_][]const u8{
|
||||
"position", "last", "count", "sum",
|
||||
"floor", "ceiling", "round", "number",
|
||||
"string-length",
|
||||
};
|
||||
for (numeric) |n| {
|
||||
if (std.mem.eql(u8, name, n)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn containsPositionOrLast(expr: *const ast.Expr) bool {
|
||||
return switch (expr.*) {
|
||||
.number, .literal, .var_ref => false,
|
||||
.neg => |inner| containsPositionOrLast(inner),
|
||||
.binop => |bo| containsPositionOrLast(bo.left) or containsPositionOrLast(bo.right),
|
||||
.filter => |f| containsPositionOrLast(f.expr) or containsPositionOrLast(f.predicate),
|
||||
.filter_path => |fp| containsPositionOrLast(fp.filter) or stepsContainPositionOrLast(fp.steps),
|
||||
.path => |p| stepsContainPositionOrLast(p.steps),
|
||||
.fn_call => |fc| std.mem.eql(u8, fc.name, "position") or
|
||||
std.mem.eql(u8, fc.name, "last") or
|
||||
argsContainPositionOrLast(fc.args),
|
||||
};
|
||||
}
|
||||
|
||||
fn stepsContainPositionOrLast(steps: []const ast.Step) bool {
|
||||
for (steps) |s| {
|
||||
for (s.predicates) |p| {
|
||||
if (containsPositionOrLast(p)) return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn argsContainPositionOrLast(args: []const *ast.Expr) bool {
|
||||
for (args) |a| {
|
||||
if (containsPositionOrLast(a)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn isDescendantOrSelfNode(s: ast.Step) bool {
|
||||
if (s.axis != .descendant_or_self) return false;
|
||||
if (s.predicates.len != 0) return false;
|
||||
return switch (s.node_test) {
|
||||
.type_test => |k| k == .node,
|
||||
.name => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn isSelfNode(s: ast.Step) bool {
|
||||
if (s.axis != .self) return false;
|
||||
if (s.predicates.len != 0) return false;
|
||||
return switch (s.node_test) {
|
||||
.type_test => |k| k == .node,
|
||||
.name => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn matchAttrEqLiteral(expr: *const ast.Expr, attr_name: []const u8) ?[]const u8 {
|
||||
if (expr.* != .binop) return null;
|
||||
const bo = expr.binop;
|
||||
if (bo.op != .eq) return null;
|
||||
if (isAttrPath(bo.left, attr_name) and bo.right.* == .literal) return bo.right.literal;
|
||||
if (isAttrPath(bo.right, attr_name) and bo.left.* == .literal) return bo.left.literal;
|
||||
return null;
|
||||
}
|
||||
|
||||
fn isAttrPath(expr: *const ast.Expr, attr_name: []const u8) bool {
|
||||
if (expr.* != .path) return false;
|
||||
const p = expr.path;
|
||||
if (p.absolute) return false;
|
||||
if (p.steps.len != 1) return false;
|
||||
const s = p.steps[0];
|
||||
if (s.axis != .attribute) return false;
|
||||
if (s.predicates.len != 0) return false;
|
||||
return switch (s.node_test) {
|
||||
.name => |n| std.mem.eql(u8, n, attr_name),
|
||||
.type_test => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn evalFilterPath(self: *Evaluator, fp: ast.FilterPath, ctx: *Node, pos: usize, size: usize) Error!result.Result {
|
||||
const base = try self.evalExpr(fp.filter, ctx, pos, size);
|
||||
if (base != .node_set) return base;
|
||||
|
||||
var current: []const *Node = base.node_set;
|
||||
for (fp.steps) |step| {
|
||||
const r = try self.evalStep(current, step);
|
||||
current = r.node_set;
|
||||
}
|
||||
return .{ .node_set = current };
|
||||
}
|
||||
|
||||
fn evalFilter(self: *Evaluator, f: ast.Filter, ctx: *Node, pos: usize, size: usize) Error!result.Result {
|
||||
const base = try self.evalExpr(f.expr, ctx, pos, size);
|
||||
if (base != .node_set) return base;
|
||||
|
||||
var out: std.ArrayList(*Node) = .empty;
|
||||
const sz = base.node_set.len;
|
||||
for (base.node_set, 0..) |n, idx| {
|
||||
const k = idx + 1;
|
||||
const val = try self.evalExpr(f.predicate, n, k, sz);
|
||||
if (predicateMatches(val, k)) try out.append(self.arena, n);
|
||||
}
|
||||
return .{ .node_set = out.items };
|
||||
}
|
||||
|
||||
// ----- step + axis -----
|
||||
|
||||
fn evalStep(self: *Evaluator, ctx_nodes: []const *Node, step: ast.Step) Error!result.Result {
|
||||
var dedup: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty;
|
||||
|
||||
// Pre-lowercase the name test once per step. matchNameTest does
|
||||
// case-insensitive matching (decision #2); without this hoist, every
|
||||
// axis node would pay the per-byte case-fold inside `eqlIgnoreCase`.
|
||||
const lowered_name: ?[]const u8 = switch (step.node_test) {
|
||||
.name => |n| if (std.mem.eql(u8, n, "*")) null else try std.ascii.allocLowerString(self.arena, n),
|
||||
.type_test => null,
|
||||
};
|
||||
|
||||
for (ctx_nodes) |ctx| {
|
||||
const axis_nodes = try self.axisNodes(ctx, step.axis);
|
||||
|
||||
var filtered: std.ArrayList(*Node) = .empty;
|
||||
for (axis_nodes) |n| {
|
||||
if (matchTest(n, step.node_test, step.axis, lowered_name)) {
|
||||
try filtered.append(self.arena, n);
|
||||
}
|
||||
}
|
||||
|
||||
var current: []const *Node = filtered.items;
|
||||
for (step.predicates) |pred| {
|
||||
var next: std.ArrayList(*Node) = .empty;
|
||||
const sz = current.len;
|
||||
for (current, 0..) |n, idx| {
|
||||
const k = idx + 1;
|
||||
const val = try self.evalExpr(pred, n, k, sz);
|
||||
if (predicateMatches(val, k)) try next.append(self.arena, n);
|
||||
}
|
||||
current = next.items;
|
||||
}
|
||||
|
||||
for (current) |n| try dedup.put(self.arena, n, {});
|
||||
}
|
||||
|
||||
return .{ .node_set = dedup.keys() };
|
||||
}
|
||||
|
||||
fn axisNodes(self: *Evaluator, node: *Node, axis: ast.Axis) Error![]const *Node {
|
||||
var out: std.ArrayList(*Node) = .empty;
|
||||
switch (axis) {
|
||||
.child => {
|
||||
var it = node.childrenIterator();
|
||||
while (it.next()) |c| try out.append(self.arena, c);
|
||||
},
|
||||
.descendant => try self.appendDescendants(node, &out),
|
||||
.descendant_or_self => {
|
||||
try out.append(self.arena, node);
|
||||
try self.appendDescendants(node, &out);
|
||||
},
|
||||
.self => try out.append(self.arena, node),
|
||||
.parent => {
|
||||
if (node.parentNode()) |p| try out.append(self.arena, p);
|
||||
},
|
||||
// Reverse axes — proximity order (nearest first). Final node-set
|
||||
// is sorted to document order at the public boundary.
|
||||
.ancestor => {
|
||||
var p = node.parentNode();
|
||||
while (p) |n| : (p = n.parentNode()) try out.append(self.arena, n);
|
||||
},
|
||||
.ancestor_or_self => {
|
||||
try out.append(self.arena, node);
|
||||
var p = node.parentNode();
|
||||
while (p) |n| : (p = n.parentNode()) try out.append(self.arena, n);
|
||||
},
|
||||
.following_sibling => {
|
||||
var s = node.nextSibling();
|
||||
while (s) |n| : (s = n.nextSibling()) try out.append(self.arena, n);
|
||||
},
|
||||
.preceding_sibling => {
|
||||
var s = node.previousSibling();
|
||||
while (s) |n| : (s = n.previousSibling()) try out.append(self.arena, n);
|
||||
},
|
||||
.following => try self.appendFollowing(node, &out),
|
||||
.preceding => try self.appendPreceding(node, &out),
|
||||
.attribute => try self.appendAttributes(node, &out),
|
||||
.namespace, .unknown => {}, // decision #3 stubs
|
||||
}
|
||||
return out.items;
|
||||
}
|
||||
|
||||
fn appendDescendants(self: *Evaluator, node: *Node, out: *std.ArrayList(*Node)) Error!void {
|
||||
var it = node.childrenIterator();
|
||||
while (it.next()) |c| {
|
||||
try out.append(self.arena, c);
|
||||
try self.appendDescendants(c, out);
|
||||
}
|
||||
}
|
||||
|
||||
fn appendFollowing(self: *Evaluator, start: *Node, out: *std.ArrayList(*Node)) Error!void {
|
||||
var n: ?*Node = start;
|
||||
while (n) |cur| : (n = cur.parentNode()) {
|
||||
var s = cur.nextSibling();
|
||||
while (s) |sn| : (s = sn.nextSibling()) {
|
||||
try out.append(self.arena, sn);
|
||||
try self.appendDescendants(sn, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn appendPrecedingSubtree(self: *Evaluator, n: *Node, out: *std.ArrayList(*Node)) Error!void {
|
||||
// Reverse document order: deepest-last children first, then self.
|
||||
var c = n.lastChild();
|
||||
while (c) |child| : (c = child.previousSibling()) {
|
||||
try self.appendPrecedingSubtree(child, out);
|
||||
}
|
||||
try out.append(self.arena, n);
|
||||
}
|
||||
|
||||
fn appendPreceding(self: *Evaluator, start: *Node, out: *std.ArrayList(*Node)) Error!void {
|
||||
var n: ?*Node = start;
|
||||
while (n) |cur| {
|
||||
const parent = cur.parentNode() orelse break;
|
||||
var s = cur.previousSibling();
|
||||
while (s) |sn| : (s = sn.previousSibling()) {
|
||||
try self.appendPrecedingSubtree(sn, out);
|
||||
}
|
||||
n = parent;
|
||||
}
|
||||
}
|
||||
|
||||
fn appendAttributes(self: *Evaluator, node: *Node, out: *std.ArrayList(*Node)) Error!void {
|
||||
const el = node.is(Element) orelse return;
|
||||
var it = el.attributeIterator();
|
||||
while (it.next()) |entry| {
|
||||
// Memoize via frame._attribute_lookup so repeated XPath queries
|
||||
// (Capybara/Selenium polling) reuse the same *Attribute instead
|
||||
// of leaking fresh ones into page-lifetime storage on every call.
|
||||
// Same pattern as Attribute.List.getAttribute / NamedNodeMap.getAtIndex.
|
||||
const gop = try self.frame._attribute_lookup.getOrPut(self.frame.arena, @intFromPtr(entry));
|
||||
if (!gop.found_existing) {
|
||||
gop.value_ptr.* = try entry.toAttribute(el, self.frame);
|
||||
}
|
||||
try out.append(self.arena, gop.value_ptr.*._proto);
|
||||
}
|
||||
}
|
||||
|
||||
// ----- node test matching -----
|
||||
|
||||
fn matchTest(node: *Node, test_: ast.NodeTest, axis: ast.Axis, lowered_name: ?[]const u8) bool {
|
||||
return switch (test_) {
|
||||
.type_test => |kind| switch (kind) {
|
||||
.node => true,
|
||||
// XPath 1.0 §5.7: the data model has no CDATASection node —
|
||||
// CDATA content is part of the text node value. Match both
|
||||
// Text (3) and CDATASection (4) DOM node types.
|
||||
.text => node.getNodeType() == 3 or node.getNodeType() == 4,
|
||||
.comment => node.getNodeType() == 8,
|
||||
.processing_instruction => node.getNodeType() == 7,
|
||||
},
|
||||
.name => |name| matchNameTest(node, name, axis, lowered_name),
|
||||
};
|
||||
}
|
||||
|
||||
fn matchNameTest(node: *Node, name: []const u8, axis: ast.Axis, lowered_name: ?[]const u8) bool {
|
||||
// `lowered_name` is non-null iff `name != "*"`. Element tag names
|
||||
// (`getTagNameLower`) and html5ever-stored attribute names are already
|
||||
// lowercase, so a plain `mem.eql` against the pre-lowered test name
|
||||
// replaces the per-call `eqlIgnoreCase`.
|
||||
if (axis == .attribute) {
|
||||
if (std.mem.eql(u8, name, "*")) return node._type == .attribute;
|
||||
const attr = switch (node._type) {
|
||||
.attribute => |a| a,
|
||||
else => return false,
|
||||
};
|
||||
return std.mem.eql(u8, attr._name.str(), lowered_name.?);
|
||||
}
|
||||
const el = node.is(Element) orelse return false;
|
||||
if (std.mem.eql(u8, name, "*")) return true;
|
||||
return std.mem.eql(u8, el.getTagNameLower(), lowered_name.?);
|
||||
}
|
||||
|
||||
// ----- binop -----
|
||||
|
||||
fn evalBinop(self: *Evaluator, bo: ast.BinOp, ctx: *Node, pos: usize, size: usize) Error!result.Result {
|
||||
switch (bo.op) {
|
||||
.or_ => {
|
||||
const l = try self.evalExpr(bo.left, ctx, pos, size);
|
||||
if (result.toBoolean(l)) return .{ .boolean = true };
|
||||
const r = try self.evalExpr(bo.right, ctx, pos, size);
|
||||
return .{ .boolean = result.toBoolean(r) };
|
||||
},
|
||||
.and_ => {
|
||||
const l = try self.evalExpr(bo.left, ctx, pos, size);
|
||||
if (!result.toBoolean(l)) return .{ .boolean = false };
|
||||
const r = try self.evalExpr(bo.right, ctx, pos, size);
|
||||
return .{ .boolean = result.toBoolean(r) };
|
||||
},
|
||||
.eq, .neq, .lt, .gt, .lte, .gte => {
|
||||
const l = try self.evalExpr(bo.left, ctx, pos, size);
|
||||
const r = try self.evalExpr(bo.right, ctx, pos, size);
|
||||
return .{ .boolean = try self.xCmp(l, r, bo.op) };
|
||||
},
|
||||
.add, .sub, .mul, .div, .mod => {
|
||||
const l = try self.evalExpr(bo.left, ctx, pos, size);
|
||||
const r = try self.evalExpr(bo.right, ctx, pos, size);
|
||||
const ln = try result.toNumber(self.arena, l);
|
||||
const rn = try result.toNumber(self.arena, r);
|
||||
const v: f64 = switch (bo.op) {
|
||||
.add => ln + rn,
|
||||
.sub => ln - rn,
|
||||
.mul => ln * rn,
|
||||
.div => ln / rn,
|
||||
// JS `%` and Zig `@rem` agree on sign for finite values
|
||||
// and propagate NaN (XPath §3.5).
|
||||
.mod => @rem(ln, rn),
|
||||
else => unreachable,
|
||||
};
|
||||
return .{ .number = v };
|
||||
},
|
||||
.union_ => {
|
||||
const l = try self.evalExpr(bo.left, ctx, pos, size);
|
||||
const r = try self.evalExpr(bo.right, ctx, pos, size);
|
||||
if (l != .node_set or r != .node_set) return error.UnionRequiresNodeSets;
|
||||
var seen: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty;
|
||||
for (l.node_set) |n| try seen.put(self.arena, n, {});
|
||||
for (r.node_set) |n| try seen.put(self.arena, n, {});
|
||||
const nodes = seen.keys();
|
||||
sortDocOrder(@constCast(nodes));
|
||||
return .{ .node_set = nodes };
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ----- comparison (XPath spec §3.4) -----
|
||||
|
||||
fn xCmp(self: *Evaluator, left: result.Result, right: result.Result, op: ast.BinOpKind) Error!bool {
|
||||
const is_eq = (op == .eq or op == .neq);
|
||||
const l_is_set = (left == .node_set);
|
||||
const r_is_set = (right == .node_set);
|
||||
|
||||
if (l_is_set and r_is_set) {
|
||||
// Cache right-side string-values once. Without this, each left node
|
||||
// would pay |right| allocations — O(N×M) for a set×set comparison
|
||||
// (e.g. `//foo = //bar` on a large page).
|
||||
const right_strings = try self.arena.alloc([]const u8, right.node_set.len);
|
||||
for (right.node_set, 0..) |r, i| {
|
||||
right_strings[i] = try result.stringValueOf(self.arena, r);
|
||||
}
|
||||
for (left.node_set) |l| {
|
||||
const lv = try result.stringValueOf(self.arena, l);
|
||||
for (right_strings) |rv| {
|
||||
const matched = if (is_eq)
|
||||
cmpString(lv, rv, op)
|
||||
else
|
||||
cmpNumber(result.stringToNumber(lv), result.stringToNumber(rv), op);
|
||||
if (matched) return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (l_is_set or r_is_set) {
|
||||
const ns = if (l_is_set) left.node_set else right.node_set;
|
||||
const other = if (l_is_set) right else left;
|
||||
const ns_left = l_is_set;
|
||||
|
||||
if (other == .boolean) {
|
||||
const ns_b = ns.len > 0;
|
||||
const a, const b = if (ns_left) .{ ns_b, other.boolean } else .{ other.boolean, ns_b };
|
||||
return cmpBool(a, b, op);
|
||||
}
|
||||
|
||||
for (ns) |n| {
|
||||
const sv = try result.stringValueOf(self.arena, n);
|
||||
const matched = switch (other) {
|
||||
.number => |num| blk: {
|
||||
const sv_num = result.stringToNumber(sv);
|
||||
const a, const b = if (ns_left) .{ sv_num, num } else .{ num, sv_num };
|
||||
break :blk cmpNumber(a, b, op);
|
||||
},
|
||||
.string => |s| blk: {
|
||||
if (is_eq) {
|
||||
const a, const b = if (ns_left) .{ sv, s } else .{ s, sv };
|
||||
break :blk cmpString(a, b, op);
|
||||
}
|
||||
const sv_num = result.stringToNumber(sv);
|
||||
const s_num = result.stringToNumber(s);
|
||||
const a, const b = if (ns_left) .{ sv_num, s_num } else .{ s_num, sv_num };
|
||||
break :blk cmpNumber(a, b, op);
|
||||
},
|
||||
.boolean, .node_set => unreachable, // handled above
|
||||
};
|
||||
if (matched) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Neither is a node-set.
|
||||
if (is_eq) {
|
||||
if (left == .boolean or right == .boolean) {
|
||||
return cmpBool(result.toBoolean(left), result.toBoolean(right), op);
|
||||
}
|
||||
if (left == .number or right == .number) {
|
||||
const ln = try result.toNumber(self.arena, left);
|
||||
const rn = try result.toNumber(self.arena, right);
|
||||
return cmpNumber(ln, rn, op);
|
||||
}
|
||||
const ls = try result.toString(self.arena, left);
|
||||
const rs = try result.toString(self.arena, right);
|
||||
return cmpString(ls, rs, op);
|
||||
}
|
||||
// Non-eq with no node-set: both → number.
|
||||
const ln = try result.toNumber(self.arena, left);
|
||||
const rn = try result.toNumber(self.arena, right);
|
||||
return cmpNumber(ln, rn, op);
|
||||
}
|
||||
|
||||
fn cmpString(a: []const u8, b: []const u8, op: ast.BinOpKind) bool {
|
||||
const equal = std.mem.eql(u8, a, b);
|
||||
return switch (op) {
|
||||
.eq => equal,
|
||||
.neq => !equal,
|
||||
else => unreachable, // <, > etc. always coerce to number first
|
||||
};
|
||||
}
|
||||
|
||||
fn cmpNumber(a: f64, b: f64, op: ast.BinOpKind) bool {
|
||||
// Native f64 comparison gives correct NaN semantics:
|
||||
// NaN == X is false, NaN != X is true, NaN < X (etc.) is false.
|
||||
return switch (op) {
|
||||
.eq => a == b,
|
||||
.neq => a != b,
|
||||
.lt => a < b,
|
||||
.gt => a > b,
|
||||
.lte => a <= b,
|
||||
.gte => a >= b,
|
||||
else => unreachable,
|
||||
};
|
||||
}
|
||||
|
||||
fn cmpBool(a: bool, b: bool, op: ast.BinOpKind) bool {
|
||||
return switch (op) {
|
||||
.eq => a == b,
|
||||
.neq => a != b,
|
||||
else => unreachable,
|
||||
};
|
||||
}
|
||||
|
||||
// ----- function calls -----
|
||||
|
||||
fn evalFnCall(self: *Evaluator, fc: ast.FnCall, ctx: *Node, pos: usize, size: usize) Error!result.Result {
|
||||
// position()/last() stay here — they need the (pos, size) closure
|
||||
// that functions.call doesn't see. Keeping them inline avoids
|
||||
// pushing per-call context through Functions' signature.
|
||||
if (std.mem.eql(u8, fc.name, "position")) return .{ .number = @floatFromInt(pos) };
|
||||
if (std.mem.eql(u8, fc.name, "last")) return .{ .number = @floatFromInt(size) };
|
||||
|
||||
// Eagerly evaluate args. Matches the polyfill's `evaluate(args[i], ...)`
|
||||
// pattern; lazy short-circuit isn't needed because `or`/`and` are
|
||||
// binops handled in evalBinop, not function calls.
|
||||
const eval_args = try self.arena.alloc(result.Result, fc.args.len);
|
||||
for (fc.args, 0..) |a, i| eval_args[i] = try self.evalExpr(a, ctx, pos, size);
|
||||
|
||||
return functions.call(self.arena, fc.name, eval_args, ctx, self.frame);
|
||||
}
|
||||
|
||||
// ----- helpers -----
|
||||
|
||||
fn predicateMatches(val: result.Result, position: usize) bool {
|
||||
return switch (val) {
|
||||
// Numeric predicate value selects only the node at that position
|
||||
// (1-based). Non-integer numbers never match.
|
||||
.number => |n| n == @as(f64, @floatFromInt(position)),
|
||||
else => result.toBoolean(val),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn sortDocOrder(nodes: []*Node) void {
|
||||
if (nodes.len <= 1) return;
|
||||
std.mem.sort(*Node, nodes, {}, lessThanDocOrder);
|
||||
}
|
||||
|
||||
fn lessThanDocOrder(_: void, a: *Node, b: *Node) bool {
|
||||
if (a == b) return false;
|
||||
const pos = a.compareDocumentPosition(b);
|
||||
// FOLLOWING (0x04) — b comes after a in document order.
|
||||
return (pos & 0x04) != 0;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Tests — pure-logic only. DOM-dependent evaluation lands as HTML
|
||||
// fixtures in Phase 9 (tests/xpath/*.html); Lightpanda has no in-Zig
|
||||
// way to construct a Frame + Document tree without the JS runtime.
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
const testing = std.testing;
|
||||
const Tokenizer = @import("Tokenizer.zig");
|
||||
|
||||
test "Evaluator: cmpNumber NaN semantics" {
|
||||
const nan = std.math.nan(f64);
|
||||
try testing.expect(!cmpNumber(nan, nan, .eq));
|
||||
try testing.expect(cmpNumber(nan, nan, .neq));
|
||||
try testing.expect(!cmpNumber(nan, 0, .lt));
|
||||
try testing.expect(!cmpNumber(nan, 0, .gt));
|
||||
try testing.expect(!cmpNumber(nan, 0, .lte));
|
||||
try testing.expect(!cmpNumber(nan, 0, .gte));
|
||||
try testing.expect(cmpNumber(0, 0, .eq));
|
||||
try testing.expect(cmpNumber(1, 2, .lt));
|
||||
try testing.expect(cmpNumber(2, 1, .gt));
|
||||
try testing.expect(cmpNumber(1, 1, .lte));
|
||||
try testing.expect(cmpNumber(1, 1, .gte));
|
||||
}
|
||||
|
||||
test "Evaluator: cmpString" {
|
||||
try testing.expect(cmpString("a", "a", .eq));
|
||||
try testing.expect(!cmpString("a", "b", .eq));
|
||||
try testing.expect(cmpString("a", "b", .neq));
|
||||
try testing.expect(!cmpString("a", "a", .neq));
|
||||
}
|
||||
|
||||
test "Evaluator: cmpBool" {
|
||||
try testing.expect(cmpBool(true, true, .eq));
|
||||
try testing.expect(!cmpBool(true, false, .eq));
|
||||
try testing.expect(cmpBool(true, false, .neq));
|
||||
}
|
||||
|
||||
test "Evaluator: predicateMatches numeric vs boolean" {
|
||||
try testing.expect(predicateMatches(.{ .number = 1 }, 1));
|
||||
try testing.expect(!predicateMatches(.{ .number = 2 }, 1));
|
||||
// Non-integer never matches.
|
||||
try testing.expect(!predicateMatches(.{ .number = 1.5 }, 1));
|
||||
// Boolean: any truthy value passes regardless of position.
|
||||
try testing.expect(predicateMatches(.{ .boolean = true }, 7));
|
||||
try testing.expect(!predicateMatches(.{ .boolean = false }, 1));
|
||||
// String: nonempty truthy.
|
||||
try testing.expect(predicateMatches(.{ .string = "x" }, 99));
|
||||
try testing.expect(!predicateMatches(.{ .string = "" }, 1));
|
||||
// Empty node-set: falsy.
|
||||
try testing.expect(!predicateMatches(.{ .node_set = &.{} }, 1));
|
||||
}
|
||||
|
||||
test "Evaluator: scalar arithmetic via parsed expressions" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "1 + 2", 3 },
|
||||
.{ "5 - 3", 2 },
|
||||
.{ "4 * 2", 8 },
|
||||
.{ "10 div 4", 2.5 },
|
||||
.{ "10 mod 3", 1 },
|
||||
.{ "-5", -5 },
|
||||
.{ "1 + 2 * 3", 7 },
|
||||
}) |case| {
|
||||
const expr = try Parser.parse(a, case[0]);
|
||||
// Frame is unused for pure-arithmetic AST. The unsafe cast lets
|
||||
// us exercise binop / number paths without a real DOM. Any path
|
||||
// accessing the Frame would crash; the inputs above never do.
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const ctx_dummy: *Node = @ptrFromInt(0x2000);
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
|
||||
try testing.expect(r == .number);
|
||||
try testing.expectEqual(@as(f64, case[1]), r.number);
|
||||
}
|
||||
}
|
||||
|
||||
test "Evaluator: scalar comparison via parsed expressions" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "1 = 1", true },
|
||||
.{ "1 = 2", false },
|
||||
.{ "1 != 2", true },
|
||||
.{ "1 < 2", true },
|
||||
.{ "2 < 1", false },
|
||||
.{ "1 <= 1", true },
|
||||
.{ "2 >= 2", true },
|
||||
.{ "'abc' = 'abc'", true },
|
||||
.{ "'abc' != 'abd'", true },
|
||||
}) |case| {
|
||||
const expr = try Parser.parse(a, case[0]);
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const ctx_dummy: *Node = @ptrFromInt(0x2000);
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
|
||||
try testing.expect(r == .boolean);
|
||||
try testing.expectEqual(case[1], r.boolean);
|
||||
}
|
||||
}
|
||||
|
||||
test "Evaluator: position() and last() reflect context" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
const ctx_dummy: *Node = @ptrFromInt(0x2000);
|
||||
|
||||
{
|
||||
const expr = try Parser.parse(a, "position()");
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 3, 5);
|
||||
try testing.expectEqual(@as(f64, 3), r.number);
|
||||
}
|
||||
{
|
||||
const expr = try Parser.parse(a, "last()");
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 3, 5);
|
||||
try testing.expectEqual(@as(f64, 5), r.number);
|
||||
}
|
||||
{
|
||||
// Logical short-circuit: last() never evaluates if first
|
||||
// operand is true.
|
||||
const expr = try Parser.parse(a, "1 = 1 or last() > 0");
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
|
||||
try testing.expect(r.boolean);
|
||||
}
|
||||
}
|
||||
|
||||
test "Evaluator: short-circuit and/or" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
const ctx_dummy: *Node = @ptrFromInt(0x2000);
|
||||
|
||||
inline for (.{
|
||||
.{ "1 = 2 or 1 = 1", true },
|
||||
.{ "1 = 1 and 1 = 2", false },
|
||||
.{ "1 = 1 and 2 = 2", true },
|
||||
.{ "1 = 2 and 1 = 1", false },
|
||||
.{ "1 = 2 or 2 = 1", false },
|
||||
}) |case| {
|
||||
const expr = try Parser.parse(a, case[0]);
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
|
||||
try testing.expect(r == .boolean);
|
||||
try testing.expectEqual(case[1], r.boolean);
|
||||
}
|
||||
}
|
||||
|
||||
test "Evaluator: unary minus" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
const ctx_dummy: *Node = @ptrFromInt(0x2000);
|
||||
|
||||
const expr = try Parser.parse(a, "-(3 + 2)");
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
|
||||
try testing.expectEqual(@as(f64, -5), r.number);
|
||||
}
|
||||
|
||||
test "Evaluator: division by zero produces infinity / NaN per IEEE" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
const ctx_dummy: *Node = @ptrFromInt(0x2000);
|
||||
|
||||
{
|
||||
const expr = try Parser.parse(a, "1 div 0");
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
|
||||
try testing.expect(std.math.isPositiveInf(r.number));
|
||||
}
|
||||
{
|
||||
const expr = try Parser.parse(a, "0 div 0");
|
||||
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
|
||||
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
|
||||
try testing.expect(std.math.isNan(r.number));
|
||||
}
|
||||
}
|
||||
|
||||
test "Evaluator: searchAll on scalar expression returns empty (decision #3)" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
// Synthetic frame/root pointers are safe here because pure-scalar
|
||||
// expressions (binop, literal, true(), comparison) never reach into
|
||||
// the Frame or the context node. Adding a DOM-touching expression
|
||||
// (e.g. `id('x')`) to this list would crash on dereference.
|
||||
inline for (.{ "1 + 2", "'hello'", "true()", "1 = 1" }) |expr| {
|
||||
const nodes = try searchAll(a, @ptrFromInt(0x2000), expr, @ptrFromInt(0x1000));
|
||||
try testing.expectEqual(@as(usize, 0), nodes.len);
|
||||
}
|
||||
}
|
||||
957
src/browser/xpath/Parser.zig
Normal file
957
src/browser/xpath/Parser.zig
Normal file
@@ -0,0 +1,957 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! XPath 1.0 expression parser.
|
||||
//!
|
||||
//! Recursive descent over a fully-tokenized stream, producing an
|
||||
//! `ast.Expr` tree allocated on the caller's arena. The AST borrows
|
||||
//! string/name slices from `input` and is valid for as long as the
|
||||
//! arena and input outlive it.
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
const Tokenizer = @import("Tokenizer.zig");
|
||||
const ast = @import("ast.zig");
|
||||
|
||||
const Token = Tokenizer.Token;
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
const Parser = @This();
|
||||
|
||||
pub const Error = error{
|
||||
OutOfMemory,
|
||||
UnexpectedToken,
|
||||
ExpectedNodeTest,
|
||||
ExpectedPrimaryExpr,
|
||||
MaxDepthExceeded,
|
||||
};
|
||||
|
||||
/// Cap recursive descent to keep adversarial input (e.g. `(((((...)))))`,
|
||||
/// `------5`) from blowing the stack. Real XPath expressions never come
|
||||
/// close to this; browsers typically allow several hundred.
|
||||
const max_depth: u16 = 64;
|
||||
|
||||
arena: Allocator,
|
||||
tokens: []const Token,
|
||||
pos: usize = 0,
|
||||
depth: u16 = 0,
|
||||
|
||||
pub fn parse(arena: Allocator, input: []const u8) Error!*ast.Expr {
|
||||
var token_list: std.ArrayList(Token) = .empty;
|
||||
// Token count is bounded by input length; ¼-byte-per-token is
|
||||
// generous for typical XPath and skips ArrayList regrowth.
|
||||
try token_list.ensureTotalCapacity(arena, @max(8, input.len / 4));
|
||||
var tokenizer = Tokenizer{ .input = input };
|
||||
while (true) {
|
||||
const tok = tokenizer.next();
|
||||
try token_list.append(arena, tok);
|
||||
if (tok == .eof) break;
|
||||
}
|
||||
|
||||
var parser = Parser{
|
||||
.arena = arena,
|
||||
.tokens = token_list.items,
|
||||
};
|
||||
const expr = try parser.parseExpr();
|
||||
if (parser.peek() != .eof) return error.UnexpectedToken;
|
||||
return expr;
|
||||
}
|
||||
|
||||
// --- token cursor helpers ---
|
||||
|
||||
fn peek(self: *const Parser) Token {
|
||||
return self.tokens[self.pos];
|
||||
}
|
||||
|
||||
fn lookahead(self: *const Parser, offset: usize) Token {
|
||||
const idx = self.pos + offset;
|
||||
if (idx >= self.tokens.len) return .eof;
|
||||
return self.tokens[idx];
|
||||
}
|
||||
|
||||
fn advance(self: *Parser) Token {
|
||||
const tok = self.tokens[self.pos];
|
||||
self.pos += 1;
|
||||
return tok;
|
||||
}
|
||||
|
||||
fn at(self: *const Parser, tag: std.meta.Tag(Token)) bool {
|
||||
return self.peek() == tag;
|
||||
}
|
||||
|
||||
fn match(self: *Parser, tag: std.meta.Tag(Token)) bool {
|
||||
if (self.at(tag)) {
|
||||
_ = self.advance();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn expect(self: *Parser, tag: std.meta.Tag(Token)) Error!Token {
|
||||
if (!self.at(tag)) return error.UnexpectedToken;
|
||||
return self.advance();
|
||||
}
|
||||
|
||||
fn matchKeyword(self: *Parser, keyword: []const u8) bool {
|
||||
const tok = self.peek();
|
||||
if (tok == .name and std.mem.eql(u8, tok.name, keyword)) {
|
||||
_ = self.advance();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn makeExpr(self: *Parser, value: ast.Expr) Error!*ast.Expr {
|
||||
const expr = try self.arena.create(ast.Expr);
|
||||
expr.* = value;
|
||||
return expr;
|
||||
}
|
||||
|
||||
fn makeBinop(self: *Parser, op: ast.BinOpKind, left: *ast.Expr, right: *ast.Expr) Error!*ast.Expr {
|
||||
return try self.makeExpr(.{ .binop = .{ .op = op, .left = left, .right = right } });
|
||||
}
|
||||
|
||||
// --- operator-precedence chain ---
|
||||
//
|
||||
// Or → And → Equality → Relational → Additive → Mult → Unary → Union → Path
|
||||
|
||||
fn parseExpr(self: *Parser) Error!*ast.Expr {
|
||||
if (self.depth >= max_depth) return error.MaxDepthExceeded;
|
||||
self.depth += 1;
|
||||
defer self.depth -= 1;
|
||||
return self.parseOrExpr();
|
||||
}
|
||||
|
||||
fn parseOrExpr(self: *Parser) Error!*ast.Expr {
|
||||
var left = try self.parseAndExpr();
|
||||
while (self.matchKeyword("or")) {
|
||||
const right = try self.parseAndExpr();
|
||||
left = try self.makeBinop(.or_, left, right);
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
fn parseAndExpr(self: *Parser) Error!*ast.Expr {
|
||||
var left = try self.parseEqualityExpr();
|
||||
while (self.matchKeyword("and")) {
|
||||
const right = try self.parseEqualityExpr();
|
||||
left = try self.makeBinop(.and_, left, right);
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
fn parseEqualityExpr(self: *Parser) Error!*ast.Expr {
|
||||
var left = try self.parseRelationalExpr();
|
||||
while (equalityOp(self.peek())) |op| {
|
||||
_ = self.advance();
|
||||
const right = try self.parseRelationalExpr();
|
||||
left = try self.makeBinop(op, left, right);
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
fn parseRelationalExpr(self: *Parser) Error!*ast.Expr {
|
||||
var left = try self.parseAdditiveExpr();
|
||||
while (relationalOp(self.peek())) |op| {
|
||||
_ = self.advance();
|
||||
const right = try self.parseAdditiveExpr();
|
||||
left = try self.makeBinop(op, left, right);
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
fn parseAdditiveExpr(self: *Parser) Error!*ast.Expr {
|
||||
var left = try self.parseMultExpr();
|
||||
while (additiveOp(self.peek())) |op| {
|
||||
_ = self.advance();
|
||||
const right = try self.parseMultExpr();
|
||||
left = try self.makeBinop(op, left, right);
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
// After a complete unary expression, `*` is multiply; `div`/`mod` are
|
||||
// operator-position keywords (tokenized as Name).
|
||||
fn parseMultExpr(self: *Parser) Error!*ast.Expr {
|
||||
var left = try self.parseUnaryExpr();
|
||||
while (multOp(self.peek())) |op| {
|
||||
_ = self.advance();
|
||||
const right = try self.parseUnaryExpr();
|
||||
left = try self.makeBinop(op, left, right);
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
fn parseUnaryExpr(self: *Parser) Error!*ast.Expr {
|
||||
if (self.match(.minus)) {
|
||||
if (self.depth >= max_depth) return error.MaxDepthExceeded;
|
||||
self.depth += 1;
|
||||
defer self.depth -= 1;
|
||||
const operand = try self.parseUnaryExpr();
|
||||
return try self.makeExpr(.{ .neg = operand });
|
||||
}
|
||||
return self.parseUnionExpr();
|
||||
}
|
||||
|
||||
fn parseUnionExpr(self: *Parser) Error!*ast.Expr {
|
||||
var left = try self.parsePathExpr();
|
||||
while (self.match(.pipe)) {
|
||||
const right = try self.parsePathExpr();
|
||||
left = try self.makeBinop(.union_, left, right);
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
// --- path expressions ---
|
||||
|
||||
fn parsePathExpr(self: *Parser) Error!*ast.Expr {
|
||||
const t = self.peek();
|
||||
|
||||
if (t == .slash or t == .double_slash) {
|
||||
return self.parseAbsPath();
|
||||
}
|
||||
|
||||
// Filter-vs-relative-path disambiguation: a primary expression
|
||||
// starts with `(`, string, number, `$`, or a `name(` where the
|
||||
// name is *not* a node-type test (`node`/`text`/`comment`/`processing-instruction`).
|
||||
const is_filter = switch (t) {
|
||||
.lparen, .string, .number, .dollar => true,
|
||||
.name => |name| self.lookahead(1) == .lparen and !isNodeTypeName(name),
|
||||
else => false,
|
||||
};
|
||||
|
||||
if (is_filter) {
|
||||
var primary = try self.parsePrimaryExpr();
|
||||
while (self.match(.lbracket)) {
|
||||
const pred = try self.parseExpr();
|
||||
_ = try self.expect(.rbracket);
|
||||
primary = try self.makeExpr(.{ .filter = .{ .expr = primary, .predicate = pred } });
|
||||
}
|
||||
if (self.peek() == .slash or self.peek() == .double_slash) {
|
||||
const dsl = self.advance() == .double_slash;
|
||||
var steps: std.ArrayList(ast.Step) = .empty;
|
||||
if (dsl) try steps.append(self.arena, descendantOrSelfStep());
|
||||
try self.parseRelStepsInto(&steps);
|
||||
return try self.makeExpr(.{ .filter_path = .{
|
||||
.filter = primary,
|
||||
.steps = steps.items,
|
||||
} });
|
||||
}
|
||||
return primary;
|
||||
}
|
||||
|
||||
return self.parseRelPath();
|
||||
}
|
||||
|
||||
fn parseAbsPath(self: *Parser) Error!*ast.Expr {
|
||||
var steps: std.ArrayList(ast.Step) = .empty;
|
||||
if (self.match(.double_slash)) {
|
||||
try steps.append(self.arena, descendantOrSelfStep());
|
||||
try self.parseRelStepsInto(&steps);
|
||||
} else {
|
||||
_ = try self.expect(.slash);
|
||||
// `/` alone is the document root — no step required.
|
||||
if (self.canStartStep()) try self.parseRelStepsInto(&steps);
|
||||
}
|
||||
return try self.makeExpr(.{ .path = .{
|
||||
.absolute = true,
|
||||
.steps = steps.items,
|
||||
} });
|
||||
}
|
||||
|
||||
fn parseRelPath(self: *Parser) Error!*ast.Expr {
|
||||
var steps: std.ArrayList(ast.Step) = .empty;
|
||||
try self.parseRelStepsInto(&steps);
|
||||
return try self.makeExpr(.{ .path = .{
|
||||
.absolute = false,
|
||||
.steps = steps.items,
|
||||
} });
|
||||
}
|
||||
|
||||
fn parseRelStepsInto(self: *Parser, steps: *std.ArrayList(ast.Step)) Error!void {
|
||||
try steps.append(self.arena, try self.parseStep());
|
||||
while (self.peek() == .slash or self.peek() == .double_slash) {
|
||||
if (self.advance() == .double_slash) {
|
||||
try steps.append(self.arena, descendantOrSelfStep());
|
||||
}
|
||||
try steps.append(self.arena, try self.parseStep());
|
||||
}
|
||||
}
|
||||
|
||||
fn canStartStep(self: *const Parser) bool {
|
||||
return switch (self.peek()) {
|
||||
.name, .star, .dot, .double_dot, .at => true,
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn parseStep(self: *Parser) Error!ast.Step {
|
||||
// Abbreviated steps `.` and `..` carry no axis, node-test, or
|
||||
// predicates — predicates after `.` are a parse error per polyfill.
|
||||
if (self.match(.dot)) return abbreviatedStep(.self);
|
||||
if (self.match(.double_dot)) return abbreviatedStep(.parent);
|
||||
|
||||
var axis: ast.Axis = .child;
|
||||
if (self.match(.at)) {
|
||||
axis = .attribute;
|
||||
} else if (self.peek() == .name and self.lookahead(1) == .double_colon) {
|
||||
const axis_name = self.advance().name;
|
||||
_ = self.advance(); // `::`
|
||||
axis = parseAxisName(axis_name);
|
||||
}
|
||||
|
||||
const node_test = try self.parseNodeTest();
|
||||
|
||||
var preds: std.ArrayList(*ast.Expr) = .empty;
|
||||
while (self.match(.lbracket)) {
|
||||
const pred = try self.parseExpr();
|
||||
_ = try self.expect(.rbracket);
|
||||
try preds.append(self.arena, pred);
|
||||
}
|
||||
|
||||
return .{ .axis = axis, .node_test = node_test, .predicates = preds.items };
|
||||
}
|
||||
|
||||
fn parseNodeTest(self: *Parser) Error!ast.NodeTest {
|
||||
if (self.match(.star)) return .{ .name = "*" };
|
||||
if (self.peek() != .name) return error.ExpectedNodeTest;
|
||||
|
||||
const name = self.peek().name;
|
||||
if (typeTestKind(name)) |type_test| {
|
||||
if (self.lookahead(1) == .lparen) {
|
||||
_ = self.advance(); // name
|
||||
_ = self.advance(); // `(`
|
||||
// `processing-instruction("target")` consumes the literal but ignores it (decision #3 stub).
|
||||
if (type_test == .processing_instruction and self.peek() == .string) {
|
||||
_ = self.advance();
|
||||
}
|
||||
_ = try self.expect(.rparen);
|
||||
return .{ .type_test = type_test };
|
||||
}
|
||||
}
|
||||
_ = self.advance();
|
||||
return .{ .name = name };
|
||||
}
|
||||
|
||||
fn parsePrimaryExpr(self: *Parser) Error!*ast.Expr {
|
||||
switch (self.peek()) {
|
||||
.string => |s| {
|
||||
_ = self.advance();
|
||||
return try self.makeExpr(.{ .literal = s });
|
||||
},
|
||||
.number => |n| {
|
||||
_ = self.advance();
|
||||
return try self.makeExpr(.{ .number = n });
|
||||
},
|
||||
.dollar => {
|
||||
_ = self.advance();
|
||||
const name_tok = try self.expect(.name);
|
||||
return try self.makeExpr(.{ .var_ref = name_tok.name });
|
||||
},
|
||||
.lparen => {
|
||||
_ = self.advance();
|
||||
const e = try self.parseExpr();
|
||||
_ = try self.expect(.rparen);
|
||||
return e;
|
||||
},
|
||||
.name => |name| {
|
||||
_ = self.advance();
|
||||
_ = try self.expect(.lparen);
|
||||
var args: std.ArrayList(*ast.Expr) = .empty;
|
||||
if (self.peek() != .rparen) {
|
||||
try args.append(self.arena, try self.parseExpr());
|
||||
while (self.match(.comma)) {
|
||||
try args.append(self.arena, try self.parseExpr());
|
||||
}
|
||||
}
|
||||
_ = try self.expect(.rparen);
|
||||
return try self.makeExpr(.{ .fn_call = .{ .name = name, .args = args.items } });
|
||||
},
|
||||
else => return error.ExpectedPrimaryExpr,
|
||||
}
|
||||
}
|
||||
|
||||
// --- pure helpers ---
|
||||
|
||||
fn equalityOp(t: Token) ?ast.BinOpKind {
|
||||
return switch (t) {
|
||||
.eq => .eq,
|
||||
.neq => .neq,
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
fn relationalOp(t: Token) ?ast.BinOpKind {
|
||||
return switch (t) {
|
||||
.lt => .lt,
|
||||
.gt => .gt,
|
||||
.lte => .lte,
|
||||
.gte => .gte,
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
fn additiveOp(t: Token) ?ast.BinOpKind {
|
||||
return switch (t) {
|
||||
.plus => .add,
|
||||
.minus => .sub,
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
fn multOp(t: Token) ?ast.BinOpKind {
|
||||
return switch (t) {
|
||||
.star => .mul,
|
||||
.name => |name| blk: {
|
||||
if (std.mem.eql(u8, name, "div")) break :blk .div;
|
||||
if (std.mem.eql(u8, name, "mod")) break :blk .mod;
|
||||
break :blk null;
|
||||
},
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
fn descendantOrSelfStep() ast.Step {
|
||||
return .{
|
||||
.axis = .descendant_or_self,
|
||||
.node_test = .{ .type_test = .node },
|
||||
.predicates = &.{},
|
||||
};
|
||||
}
|
||||
|
||||
fn abbreviatedStep(axis: ast.Axis) ast.Step {
|
||||
return .{
|
||||
.axis = axis,
|
||||
.node_test = .{ .type_test = .node },
|
||||
.predicates = &.{},
|
||||
};
|
||||
}
|
||||
|
||||
fn isNodeTypeName(name: []const u8) bool {
|
||||
return typeTestKind(name) != null;
|
||||
}
|
||||
|
||||
const type_test_lookup = std.StaticStringMap(ast.TypeTest).initComptime(.{
|
||||
.{ "node", .node },
|
||||
.{ "text", .text },
|
||||
.{ "comment", .comment },
|
||||
.{ "processing-instruction", .processing_instruction },
|
||||
});
|
||||
|
||||
fn typeTestKind(name: []const u8) ?ast.TypeTest {
|
||||
return type_test_lookup.get(name);
|
||||
}
|
||||
|
||||
const axis_lookup = std.StaticStringMap(ast.Axis).initComptime(.{
|
||||
.{ "child", .child },
|
||||
.{ "descendant", .descendant },
|
||||
.{ "descendant-or-self", .descendant_or_self },
|
||||
.{ "self", .self },
|
||||
.{ "parent", .parent },
|
||||
.{ "ancestor", .ancestor },
|
||||
.{ "ancestor-or-self", .ancestor_or_self },
|
||||
.{ "following-sibling", .following_sibling },
|
||||
.{ "preceding-sibling", .preceding_sibling },
|
||||
.{ "following", .following },
|
||||
.{ "preceding", .preceding },
|
||||
.{ "attribute", .attribute },
|
||||
.{ "namespace", .namespace },
|
||||
});
|
||||
|
||||
fn parseAxisName(name: []const u8) ast.Axis {
|
||||
return axis_lookup.get(name) orelse .unknown;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
const testing = std.testing;
|
||||
|
||||
fn parseFixture(input: []const u8) !struct { arena: std.heap.ArenaAllocator, expr: *ast.Expr } {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
errdefer arena.deinit();
|
||||
const expr = try parse(arena.allocator(), input);
|
||||
return .{ .arena = arena, .expr = expr };
|
||||
}
|
||||
|
||||
test "XPath.Parser: number literal" {
|
||||
var fx = try parseFixture("42");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(@as(f64, 42), fx.expr.number);
|
||||
}
|
||||
|
||||
test "XPath.Parser: string literal" {
|
||||
var fx = try parseFixture("'hello'");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqualStrings("hello", fx.expr.literal);
|
||||
}
|
||||
|
||||
test "XPath.Parser: variable reference strips $" {
|
||||
var fx = try parseFixture("$x");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqualStrings("x", fx.expr.var_ref);
|
||||
}
|
||||
|
||||
test "XPath.Parser: parenthesized expression unwraps" {
|
||||
var fx = try parseFixture("(42)");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(@as(f64, 42), fx.expr.number);
|
||||
}
|
||||
|
||||
test "XPath.Parser: function call with no args" {
|
||||
var fx = try parseFixture("position()");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqualStrings("position", fx.expr.fn_call.name);
|
||||
try testing.expectEqual(@as(usize, 0), fx.expr.fn_call.args.len);
|
||||
}
|
||||
|
||||
test "XPath.Parser: function call with args" {
|
||||
var fx = try parseFixture("substring('abc', 2, 1)");
|
||||
defer fx.arena.deinit();
|
||||
const fc = fx.expr.fn_call;
|
||||
try testing.expectEqualStrings("substring", fc.name);
|
||||
try testing.expectEqual(@as(usize, 3), fc.args.len);
|
||||
try testing.expectEqualStrings("abc", fc.args[0].literal);
|
||||
try testing.expectEqual(@as(f64, 2), fc.args[1].number);
|
||||
try testing.expectEqual(@as(f64, 1), fc.args[2].number);
|
||||
}
|
||||
|
||||
test "XPath.Parser: arithmetic precedence — mul binds tighter than add" {
|
||||
var fx = try parseFixture("1 + 2 * 3");
|
||||
defer fx.arena.deinit();
|
||||
// Expected AST: add(1, mul(2, 3))
|
||||
const top = fx.expr.binop;
|
||||
try testing.expectEqual(ast.BinOpKind.add, top.op);
|
||||
try testing.expectEqual(@as(f64, 1), top.left.number);
|
||||
const mul = top.right.binop;
|
||||
try testing.expectEqual(ast.BinOpKind.mul, mul.op);
|
||||
try testing.expectEqual(@as(f64, 2), mul.left.number);
|
||||
try testing.expectEqual(@as(f64, 3), mul.right.number);
|
||||
}
|
||||
|
||||
test "XPath.Parser: arithmetic left-associativity" {
|
||||
var fx = try parseFixture("1 - 2 - 3");
|
||||
defer fx.arena.deinit();
|
||||
// Expected AST: sub(sub(1, 2), 3)
|
||||
const top = fx.expr.binop;
|
||||
try testing.expectEqual(ast.BinOpKind.sub, top.op);
|
||||
try testing.expectEqual(@as(f64, 3), top.right.number);
|
||||
const inner = top.left.binop;
|
||||
try testing.expectEqual(ast.BinOpKind.sub, inner.op);
|
||||
try testing.expectEqual(@as(f64, 1), inner.left.number);
|
||||
try testing.expectEqual(@as(f64, 2), inner.right.number);
|
||||
}
|
||||
|
||||
test "XPath.Parser: div and mod are operator-position keywords" {
|
||||
var fx = try parseFixture("7 div 2");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(ast.BinOpKind.div, fx.expr.binop.op);
|
||||
|
||||
var fx2 = try parseFixture("7 mod 2");
|
||||
defer fx2.arena.deinit();
|
||||
try testing.expectEqual(ast.BinOpKind.mod, fx2.expr.binop.op);
|
||||
}
|
||||
|
||||
test "XPath.Parser: comparison operators" {
|
||||
inline for (.{
|
||||
.{ "1 = 2", ast.BinOpKind.eq },
|
||||
.{ "1 != 2", ast.BinOpKind.neq },
|
||||
.{ "1 < 2", ast.BinOpKind.lt },
|
||||
.{ "1 <= 2", ast.BinOpKind.lte },
|
||||
.{ "1 > 2", ast.BinOpKind.gt },
|
||||
.{ "1 >= 2", ast.BinOpKind.gte },
|
||||
}) |case| {
|
||||
var fx = try parseFixture(case[0]);
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(case[1], fx.expr.binop.op);
|
||||
}
|
||||
}
|
||||
|
||||
test "XPath.Parser: logical or/and short-circuit chain" {
|
||||
var fx = try parseFixture("a or b and c");
|
||||
defer fx.arena.deinit();
|
||||
// Expected AST: or(path(a), and(path(b), path(c))) — and binds tighter
|
||||
const top = fx.expr.binop;
|
||||
try testing.expectEqual(ast.BinOpKind.or_, top.op);
|
||||
try testing.expectEqual(ast.BinOpKind.and_, top.right.binop.op);
|
||||
}
|
||||
|
||||
test "XPath.Parser: unary minus" {
|
||||
var fx = try parseFixture("-1");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(@as(f64, 1), fx.expr.neg.number);
|
||||
}
|
||||
|
||||
test "XPath.Parser: union" {
|
||||
var fx = try parseFixture("a | b");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(ast.BinOpKind.union_, fx.expr.binop.op);
|
||||
}
|
||||
|
||||
test "XPath.Parser: absolute path / alone is document root" {
|
||||
var fx = try parseFixture("/");
|
||||
defer fx.arena.deinit();
|
||||
const path = fx.expr.path;
|
||||
try testing.expect(path.absolute);
|
||||
try testing.expectEqual(@as(usize, 0), path.steps.len);
|
||||
}
|
||||
|
||||
test "XPath.Parser: absolute path /foo" {
|
||||
var fx = try parseFixture("/foo");
|
||||
defer fx.arena.deinit();
|
||||
const path = fx.expr.path;
|
||||
try testing.expect(path.absolute);
|
||||
try testing.expectEqual(@as(usize, 1), path.steps.len);
|
||||
try testing.expectEqualStrings("foo", path.steps[0].node_test.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: //foo expands to descendant-or-self::node()/foo" {
|
||||
var fx = try parseFixture("//foo");
|
||||
defer fx.arena.deinit();
|
||||
const path = fx.expr.path;
|
||||
try testing.expect(path.absolute);
|
||||
try testing.expectEqual(@as(usize, 2), path.steps.len);
|
||||
try testing.expectEqual(ast.Axis.descendant_or_self, path.steps[0].axis);
|
||||
try testing.expectEqual(ast.TypeTest.node, path.steps[0].node_test.type_test);
|
||||
try testing.expectEqualStrings("foo", path.steps[1].node_test.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: relative path child::foo/bar" {
|
||||
var fx = try parseFixture("foo/bar");
|
||||
defer fx.arena.deinit();
|
||||
const path = fx.expr.path;
|
||||
try testing.expect(!path.absolute);
|
||||
try testing.expectEqual(@as(usize, 2), path.steps.len);
|
||||
try testing.expectEqual(ast.Axis.child, path.steps[0].axis);
|
||||
try testing.expectEqualStrings("foo", path.steps[0].node_test.name);
|
||||
try testing.expectEqualStrings("bar", path.steps[1].node_test.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: abbreviated steps . and .." {
|
||||
var fx = try parseFixture("./..");
|
||||
defer fx.arena.deinit();
|
||||
const path = fx.expr.path;
|
||||
try testing.expectEqual(@as(usize, 2), path.steps.len);
|
||||
try testing.expectEqual(ast.Axis.self, path.steps[0].axis);
|
||||
try testing.expectEqual(ast.Axis.parent, path.steps[1].axis);
|
||||
}
|
||||
|
||||
test "XPath.Parser: attribute axis @class" {
|
||||
var fx = try parseFixture("@class");
|
||||
defer fx.arena.deinit();
|
||||
const step = fx.expr.path.steps[0];
|
||||
try testing.expectEqual(ast.Axis.attribute, step.axis);
|
||||
try testing.expectEqualStrings("class", step.node_test.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: all 12 named axes parse correctly" {
|
||||
inline for (.{
|
||||
.{ "child::a", ast.Axis.child },
|
||||
.{ "descendant::a", ast.Axis.descendant },
|
||||
.{ "descendant-or-self::a", ast.Axis.descendant_or_self },
|
||||
.{ "self::a", ast.Axis.self },
|
||||
.{ "parent::a", ast.Axis.parent },
|
||||
.{ "ancestor::a", ast.Axis.ancestor },
|
||||
.{ "ancestor-or-self::a", ast.Axis.ancestor_or_self },
|
||||
.{ "following-sibling::a", ast.Axis.following_sibling },
|
||||
.{ "preceding-sibling::a", ast.Axis.preceding_sibling },
|
||||
.{ "following::a", ast.Axis.following },
|
||||
.{ "preceding::a", ast.Axis.preceding },
|
||||
.{ "namespace::a", ast.Axis.namespace },
|
||||
}) |case| {
|
||||
var fx = try parseFixture(case[0]);
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(case[1], fx.expr.path.steps[0].axis);
|
||||
}
|
||||
}
|
||||
|
||||
test "XPath.Parser: unknown axis name maps to .unknown — polyfill parity" {
|
||||
var fx = try parseFixture("wibble::a");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(ast.Axis.unknown, fx.expr.path.steps[0].axis);
|
||||
}
|
||||
|
||||
test "XPath.Parser: wildcard *" {
|
||||
var fx = try parseFixture("*");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqualStrings("*", fx.expr.path.steps[0].node_test.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: namespace-prefixed name and wildcard" {
|
||||
var fx = try parseFixture("svg:rect");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqualStrings("svg:rect", fx.expr.path.steps[0].node_test.name);
|
||||
|
||||
var fx2 = try parseFixture("svg:*");
|
||||
defer fx2.arena.deinit();
|
||||
try testing.expectEqualStrings("svg:*", fx2.expr.path.steps[0].node_test.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: node-type tests" {
|
||||
inline for (.{
|
||||
.{ "node()", ast.TypeTest.node },
|
||||
.{ "text()", ast.TypeTest.text },
|
||||
.{ "comment()", ast.TypeTest.comment },
|
||||
.{ "processing-instruction()", ast.TypeTest.processing_instruction },
|
||||
}) |case| {
|
||||
var fx = try parseFixture(case[0]);
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(case[1], fx.expr.path.steps[0].node_test.type_test);
|
||||
}
|
||||
}
|
||||
|
||||
test "XPath.Parser: processing-instruction with literal target — consumed but ignored" {
|
||||
var fx = try parseFixture("processing-instruction('xml-stylesheet')");
|
||||
defer fx.arena.deinit();
|
||||
try testing.expectEqual(ast.TypeTest.processing_instruction, fx.expr.path.steps[0].node_test.type_test);
|
||||
}
|
||||
|
||||
test "XPath.Parser: predicate on step" {
|
||||
var fx = try parseFixture("p[1]");
|
||||
defer fx.arena.deinit();
|
||||
const step = fx.expr.path.steps[0];
|
||||
try testing.expectEqual(@as(usize, 1), step.predicates.len);
|
||||
try testing.expectEqual(@as(f64, 1), step.predicates[0].number);
|
||||
}
|
||||
|
||||
test "XPath.Parser: multi-predicate step" {
|
||||
var fx = try parseFixture("p[1][@x]");
|
||||
defer fx.arena.deinit();
|
||||
const step = fx.expr.path.steps[0];
|
||||
try testing.expectEqual(@as(usize, 2), step.predicates.len);
|
||||
}
|
||||
|
||||
test "XPath.Parser: filter expression with predicate parses as Filter, not Step" {
|
||||
var fx = try parseFixture("(//a)[1]");
|
||||
defer fx.arena.deinit();
|
||||
// Top level is Filter wrapping a parenthesized path with one predicate.
|
||||
const filt = fx.expr.filter;
|
||||
try testing.expectEqual(@as(f64, 1), filt.predicate.number);
|
||||
try testing.expect(filt.expr.path.absolute);
|
||||
}
|
||||
|
||||
test "XPath.Parser: filter with multi-predicate nests" {
|
||||
var fx = try parseFixture("(//a)[1][2]");
|
||||
defer fx.arena.deinit();
|
||||
const outer = fx.expr.filter;
|
||||
try testing.expectEqual(@as(f64, 2), outer.predicate.number);
|
||||
const inner = outer.expr.filter;
|
||||
try testing.expectEqual(@as(f64, 1), inner.predicate.number);
|
||||
}
|
||||
|
||||
test "XPath.Parser: filter with location-path tail (filter_path)" {
|
||||
var fx = try parseFixture("(//a)/b");
|
||||
defer fx.arena.deinit();
|
||||
const fp = fx.expr.filter_path;
|
||||
try testing.expect(fp.filter.path.absolute);
|
||||
try testing.expectEqual(@as(usize, 1), fp.steps.len);
|
||||
try testing.expectEqualStrings("b", fp.steps[0].node_test.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: filter with // tail prepends descendant-or-self" {
|
||||
var fx = try parseFixture("(//a)//b");
|
||||
defer fx.arena.deinit();
|
||||
const fp = fx.expr.filter_path;
|
||||
try testing.expectEqual(@as(usize, 2), fp.steps.len);
|
||||
try testing.expectEqual(ast.Axis.descendant_or_self, fp.steps[0].axis);
|
||||
try testing.expectEqualStrings("b", fp.steps[1].node_test.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: function call followed by predicate" {
|
||||
var fx = try parseFixture("id('x')[1]");
|
||||
defer fx.arena.deinit();
|
||||
const filt = fx.expr.filter;
|
||||
try testing.expectEqual(@as(f64, 1), filt.predicate.number);
|
||||
try testing.expectEqualStrings("id", filt.expr.fn_call.name);
|
||||
}
|
||||
|
||||
test "XPath.Parser: complex representative expression" {
|
||||
var fx = try parseFixture("//div[@class='active']/p[position()<=last()-1]");
|
||||
defer fx.arena.deinit();
|
||||
const path = fx.expr.path;
|
||||
try testing.expect(path.absolute);
|
||||
try testing.expectEqual(@as(usize, 3), path.steps.len);
|
||||
try testing.expectEqual(ast.Axis.descendant_or_self, path.steps[0].axis);
|
||||
try testing.expectEqualStrings("div", path.steps[1].node_test.name);
|
||||
try testing.expectEqual(@as(usize, 1), path.steps[1].predicates.len);
|
||||
try testing.expectEqualStrings("p", path.steps[2].node_test.name);
|
||||
try testing.expectEqual(@as(usize, 1), path.steps[2].predicates.len);
|
||||
}
|
||||
|
||||
fn expectParseError(input: []const u8, expected: anyerror) !void {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
try testing.expectError(expected, parse(arena.allocator(), input));
|
||||
}
|
||||
|
||||
test "XPath.Parser: error on unbalanced paren" {
|
||||
try expectParseError("(1", error.UnexpectedToken);
|
||||
}
|
||||
|
||||
test "XPath.Parser: error on unbalanced bracket" {
|
||||
try expectParseError("p[1", error.UnexpectedToken);
|
||||
}
|
||||
|
||||
test "XPath.Parser: error on missing node test" {
|
||||
try expectParseError("child::", error.ExpectedNodeTest);
|
||||
}
|
||||
|
||||
test "XPath.Parser: bare `+` falls through to step and reports missing node test" {
|
||||
// Matches polyfill: + isn't a path/primary start, so the parser
|
||||
// ends up in parseStep with no name to use as node test.
|
||||
try expectParseError("+", error.ExpectedNodeTest);
|
||||
}
|
||||
|
||||
test "XPath.Parser: error on trailing tokens" {
|
||||
try expectParseError("1 2", error.UnexpectedToken);
|
||||
}
|
||||
|
||||
test "XPath.Parser: empty string falls through to step and reports missing node test" {
|
||||
try expectParseError("", error.ExpectedNodeTest);
|
||||
}
|
||||
|
||||
test "XPath.Parser: 91-case battery — every expression parses" {
|
||||
// 91-case XPath 1.0 conformance battery covering every expression
|
||||
// shape the public API surface accepts. Each entry must parse
|
||||
// without error.
|
||||
const battery = [_][]const u8{
|
||||
"/html",
|
||||
"/html/body",
|
||||
"/",
|
||||
"//h1",
|
||||
"//ul/li",
|
||||
"//ul//li",
|
||||
".",
|
||||
".//li",
|
||||
"//section/*",
|
||||
"//*[@id='heading']",
|
||||
"//li[1]/following-sibling::li",
|
||||
"//li[5]/preceding-sibling::li",
|
||||
"//li/parent::ul",
|
||||
"//li/ancestor::body",
|
||||
"//li/ancestor-or-self::body",
|
||||
"//li[3]/preceding::li",
|
||||
"//li[1]/following::li",
|
||||
"//ul/descendant::li",
|
||||
"//ul/descendant-or-self::li",
|
||||
"//section[1]/child::span",
|
||||
"//*[@id='heading']/self::h1",
|
||||
"//a[1]/attribute::href",
|
||||
"//a[1]/@*",
|
||||
"//li[1]",
|
||||
"//li[last()]",
|
||||
"//li[last() - 1]",
|
||||
"//li[position() = 1]",
|
||||
"//li[position() > 2]",
|
||||
"//li[position() mod 2 = 1]",
|
||||
"(//li)[1]",
|
||||
"(//section)[2]",
|
||||
"//li[3]/preceding-sibling::li[1]",
|
||||
"//li[5]/ancestor::*[1]",
|
||||
"//li[contains(concat(' ', @class, ' '), ' even ')][2]",
|
||||
"//*[@id='heading' and @class='primary']",
|
||||
"//*[@id='heading' or @id='p1']",
|
||||
"//section[a]",
|
||||
"//section[count(span) = 2]",
|
||||
"//ul[count(li) = 5]",
|
||||
"//tr[td[1]]",
|
||||
"//tr[td/text() = 'Bob']",
|
||||
"//*[starts-with(@id, 'link')]",
|
||||
"//*[normalize-space() = 'Hello World']",
|
||||
"//*[normalize-space(.) = 'Item 1']",
|
||||
"//*[concat(@id, '-x') = 'heading-x']",
|
||||
"//*[substring(@id, 1, 1) = 'p']",
|
||||
"//*[substring(@id, 2, 1) = '1' and starts-with(@id, 'p')]",
|
||||
"//p[translate(@id, 'p', 'q') = 'q1']",
|
||||
"//*[substring-before(@id, '1') = 'p']",
|
||||
"//*[substring-after(@id, 'lin') = 'k1']",
|
||||
"//tr[number(td[2]) > 28]",
|
||||
"//tr[floor(number(td[2]) div 10) = 3]",
|
||||
"//tr[ceiling(number(td[2]) div 10) = 3]",
|
||||
"//tr[round(number(td[2]) div 10) = 3]",
|
||||
"//ul[sum(li/@data-len) = 0]",
|
||||
"//p[boolean(@lang)]",
|
||||
"//*[false()]",
|
||||
"//*[name() = 'h1']",
|
||||
"//*[local-name() = 'h1']",
|
||||
"id('heading')",
|
||||
"id('heading p1')",
|
||||
"id(//em/parent::p/@id)",
|
||||
"//h1 | //title",
|
||||
"//h1 | //*[@id='p1']",
|
||||
"//*[@id='heading'] | //*[@id='heading']",
|
||||
"//li[position() + 1 = 3]",
|
||||
"//li[position() - 1 = 0]",
|
||||
"//li[position() * 2 = 4]",
|
||||
"//li[position() div 2 = 1]",
|
||||
"//li[(position() mod 2) = 0]",
|
||||
"//tr[number(td[2]) = 30]",
|
||||
"//tr[number(td[2]) != 30]",
|
||||
"//tr[number(td[2]) < 30]",
|
||||
"//tr[number(td[2]) <= 30]",
|
||||
"//tr[number(td[2]) > 30]",
|
||||
"//tr[number(td[2]) >= 30]",
|
||||
"//tr[td[2] = 30]",
|
||||
"//tr[td[2] = '30']",
|
||||
"//comment()",
|
||||
".//a[contains(normalize-space(string(.)), 'Click me')]",
|
||||
".//input[(./@type = 'text')]",
|
||||
".//*[@id='heading']",
|
||||
".//li[contains(concat(' ', @class, ' '), ' even ')]",
|
||||
"//*[@id='heading']/text()",
|
||||
"//em/parent::p",
|
||||
"//p[em]",
|
||||
"//p[not(em)]",
|
||||
"//section[a/@href = '/foo']",
|
||||
"//ul/li[last()][position() = last()]",
|
||||
"//ul[string(count(li)) = '5']",
|
||||
"//body[count(//*[contains(@class, 'item')]) = 5]",
|
||||
};
|
||||
try testing.expectEqual(@as(usize, 91), battery.len);
|
||||
|
||||
for (battery) |expr| {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
_ = parse(arena.allocator(), expr) catch |err| {
|
||||
std.debug.print("\n failed to parse: {s}\n error: {s}\n", .{ expr, @errorName(err) });
|
||||
return err;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
test "XPath.Parser: deep parenthesization rejected past max_depth" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
var buf: std.ArrayList(u8) = .empty;
|
||||
defer buf.deinit(testing.allocator);
|
||||
try buf.appendNTimes(testing.allocator, '(', max_depth + 1);
|
||||
try buf.append(testing.allocator, '1');
|
||||
try buf.appendNTimes(testing.allocator, ')', max_depth + 1);
|
||||
try testing.expectError(error.MaxDepthExceeded, parse(arena.allocator(), buf.items));
|
||||
}
|
||||
|
||||
test "XPath.Parser: deep unary minus rejected past max_depth" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
var buf: std.ArrayList(u8) = .empty;
|
||||
defer buf.deinit(testing.allocator);
|
||||
try buf.appendNTimes(testing.allocator, '-', max_depth + 1);
|
||||
try buf.append(testing.allocator, '1');
|
||||
try testing.expectError(error.MaxDepthExceeded, parse(arena.allocator(), buf.items));
|
||||
}
|
||||
464
src/browser/xpath/Tokenizer.zig
Normal file
464
src/browser/xpath/Tokenizer.zig
Normal file
@@ -0,0 +1,464 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! XPath 1.0 expression tokenizer.
|
||||
//!
|
||||
//! HTML-pragmatic behavior: lenient whitespace, case-preserving names,
|
||||
//! no escape processing in string literals (use the other quote type
|
||||
//! to embed), unknown characters silently skipped.
|
||||
//!
|
||||
//! The tokenizer borrows from the input slice and never allocates.
|
||||
//! `next()` always returns a token; `.eof` is terminal and idempotent.
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
const Tokenizer = @This();
|
||||
|
||||
pub const Token = union(enum) {
|
||||
/// String literal: `'foo'` or `"foo"`. Quotes are stripped; escapes
|
||||
/// are not interpreted (the polyfill takes the raw substring).
|
||||
string: []const u8,
|
||||
|
||||
/// Numeric literal: `123`, `1.5`, `.5`, `5.`. f64 matches the
|
||||
/// runtime number type.
|
||||
number: f64,
|
||||
|
||||
/// Bare identifier — element/function/axis name, an `or`/`and`/
|
||||
/// `div`/`mod` keyword, or a namespace-prefixed name (`prefix:local`,
|
||||
/// `prefix:*`). The colon and optional wildcard are preserved
|
||||
/// verbatim so the parser can split.
|
||||
name: []const u8,
|
||||
|
||||
slash, // `/`
|
||||
double_slash, // `//`
|
||||
dot, // `.`
|
||||
double_dot, // `..`
|
||||
at, // `@`
|
||||
lparen, // `(`
|
||||
rparen, // `)`
|
||||
lbracket, // `[`
|
||||
rbracket, // `]`
|
||||
comma, // `,`
|
||||
pipe, // `|`
|
||||
eq, // `=`
|
||||
neq, // `!=`
|
||||
lt, // `<`
|
||||
lte, // `<=`
|
||||
gt, // `>`
|
||||
gte, // `>=`
|
||||
plus, // `+`
|
||||
minus, // `-`
|
||||
star, // `*`
|
||||
dollar, // `$`
|
||||
double_colon, // `::`
|
||||
eof,
|
||||
};
|
||||
|
||||
input: []const u8,
|
||||
position: usize = 0,
|
||||
|
||||
fn isEof(self: *const Tokenizer) bool {
|
||||
return self.position >= self.input.len;
|
||||
}
|
||||
|
||||
// True iff the input has at least `n` bytes left after the current one
|
||||
// — i.e. `byteAt(n)` will not read past the end.
|
||||
fn hasAtLeast(self: *const Tokenizer, n: usize) bool {
|
||||
return self.position + n < self.input.len;
|
||||
}
|
||||
|
||||
fn byteAt(self: *const Tokenizer, offset: usize) u8 {
|
||||
return self.input[self.position + offset];
|
||||
}
|
||||
|
||||
fn skipWhitespace(self: *Tokenizer) void {
|
||||
while (!self.isEof()) {
|
||||
switch (self.input[self.position]) {
|
||||
' ', '\t', '\n', '\r' => self.position += 1,
|
||||
else => return,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn isNameStart(c: u8) bool {
|
||||
return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_';
|
||||
}
|
||||
|
||||
fn isNameContinue(c: u8) bool {
|
||||
return isNameStart(c) or std.ascii.isDigit(c) or c == '-' or c == '.';
|
||||
}
|
||||
|
||||
fn consumeString(self: *Tokenizer, quote: u8) Token {
|
||||
self.position += 1; // opening quote
|
||||
const start = self.position;
|
||||
while (!self.isEof() and self.input[self.position] != quote) {
|
||||
self.position += 1;
|
||||
}
|
||||
const value = self.input[start..self.position];
|
||||
// Closing quote skipped; at EOF we just emit what we have (polyfill parity).
|
||||
if (!self.isEof()) self.position += 1;
|
||||
return .{ .string = value };
|
||||
}
|
||||
|
||||
fn consumeNumber(self: *Tokenizer) Token {
|
||||
const start = self.position;
|
||||
while (!self.isEof() and std.ascii.isDigit(self.input[self.position])) {
|
||||
self.position += 1;
|
||||
}
|
||||
if (!self.isEof() and self.input[self.position] == '.') {
|
||||
self.position += 1;
|
||||
while (!self.isEof() and std.ascii.isDigit(self.input[self.position])) {
|
||||
self.position += 1;
|
||||
}
|
||||
}
|
||||
// Caller only enters consumeNumber on a digit or `.digit`, so the
|
||||
// slice is always `\d+(\.\d*)?` or `\.\d+` — both accepted by
|
||||
// parseFloat (verified against Zig 0.15.2).
|
||||
const value = std.fmt.parseFloat(f64, self.input[start..self.position]) catch unreachable;
|
||||
return .{ .number = value };
|
||||
}
|
||||
|
||||
fn consumeName(self: *Tokenizer) Token {
|
||||
const start = self.position;
|
||||
while (!self.isEof() and isNameContinue(self.input[self.position])) {
|
||||
self.position += 1;
|
||||
}
|
||||
|
||||
// Optional namespace prefix: `prefix:local` or `prefix:*`. A `::`
|
||||
// is the axis separator and belongs to the next token, so peek
|
||||
// for a single `:` not followed by another `:`.
|
||||
if (!self.isEof() and self.input[self.position] == ':' and
|
||||
(self.position + 1 >= self.input.len or self.input[self.position + 1] != ':'))
|
||||
{
|
||||
self.position += 1; // `:`
|
||||
if (!self.isEof() and self.input[self.position] == '*') {
|
||||
self.position += 1;
|
||||
} else {
|
||||
while (!self.isEof() and isNameContinue(self.input[self.position])) {
|
||||
self.position += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return .{ .name = self.input[start..self.position] };
|
||||
}
|
||||
|
||||
pub fn next(self: *Tokenizer) Token {
|
||||
while (true) {
|
||||
self.skipWhitespace();
|
||||
if (self.isEof()) return .eof;
|
||||
|
||||
const c = self.byteAt(0);
|
||||
|
||||
if (c == '"' or c == '\'') {
|
||||
return self.consumeString(c);
|
||||
}
|
||||
|
||||
if (std.ascii.isDigit(c) or (c == '.' and self.hasAtLeast(1) and std.ascii.isDigit(self.byteAt(1)))) {
|
||||
return self.consumeNumber();
|
||||
}
|
||||
|
||||
if (self.hasAtLeast(1)) {
|
||||
const c2 = self.byteAt(1);
|
||||
switch (c) {
|
||||
'/' => if (c2 == '/') {
|
||||
self.position += 2;
|
||||
return .double_slash;
|
||||
},
|
||||
':' => if (c2 == ':') {
|
||||
self.position += 2;
|
||||
return .double_colon;
|
||||
},
|
||||
'!' => if (c2 == '=') {
|
||||
self.position += 2;
|
||||
return .neq;
|
||||
},
|
||||
'<' => if (c2 == '=') {
|
||||
self.position += 2;
|
||||
return .lte;
|
||||
},
|
||||
'>' => if (c2 == '=') {
|
||||
self.position += 2;
|
||||
return .gte;
|
||||
},
|
||||
'.' => if (c2 == '.') {
|
||||
self.position += 2;
|
||||
return .double_dot;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
const single: ?Token = switch (c) {
|
||||
'(' => .lparen,
|
||||
')' => .rparen,
|
||||
'[' => .lbracket,
|
||||
']' => .rbracket,
|
||||
',' => .comma,
|
||||
'|' => .pipe,
|
||||
'=' => .eq,
|
||||
'<' => .lt,
|
||||
'>' => .gt,
|
||||
'+' => .plus,
|
||||
'-' => .minus,
|
||||
'*' => .star,
|
||||
'$' => .dollar,
|
||||
'/' => .slash,
|
||||
'@' => .at,
|
||||
'.' => .dot,
|
||||
else => null,
|
||||
};
|
||||
if (single) |tok| {
|
||||
self.position += 1;
|
||||
return tok;
|
||||
}
|
||||
|
||||
if (isNameStart(c)) {
|
||||
return self.consumeName();
|
||||
}
|
||||
|
||||
// Polyfill parity (decision #2): unknown characters are
|
||||
// silently skipped, never an error.
|
||||
self.position += 1;
|
||||
}
|
||||
}
|
||||
|
||||
const testing = std.testing;
|
||||
|
||||
fn expectTokens(input: []const u8, expected: []const Token) !void {
|
||||
var tokenizer = Tokenizer{ .input = input };
|
||||
for (expected) |exp| {
|
||||
const got = tokenizer.next();
|
||||
try testing.expectEqualDeep(exp, got);
|
||||
}
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: empty input emits EOF" {
|
||||
try expectTokens("", &.{.eof});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: only whitespace emits EOF" {
|
||||
try expectTokens(" \t\n\r ", &.{.eof});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: EOF idempotent past end" {
|
||||
var t = Tokenizer{ .input = "" };
|
||||
try testing.expectEqual(Token.eof, t.next());
|
||||
try testing.expectEqual(Token.eof, t.next());
|
||||
try testing.expectEqual(Token.eof, t.next());
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: single-char operators" {
|
||||
try expectTokens("()[],|=<>+-*$/@.", &.{
|
||||
.lparen, .rparen, .lbracket, .rbracket, .comma, .pipe,
|
||||
.eq, .lt, .gt, .plus, .minus, .star,
|
||||
.dollar, .slash, .at, .dot, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: two-char operators" {
|
||||
try expectTokens("// :: != <= >= ..", &.{
|
||||
.double_slash, .double_colon, .neq, .lte, .gte, .double_dot, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: two-char vs single-char disambiguation" {
|
||||
try expectTokens("/a/b", &.{
|
||||
.slash, .{ .name = "a" }, .slash, .{ .name = "b" }, .eof,
|
||||
});
|
||||
try expectTokens("//a", &.{ .double_slash, .{ .name = "a" }, .eof });
|
||||
try expectTokens("a<b", &.{
|
||||
.{ .name = "a" }, .lt, .{ .name = "b" }, .eof,
|
||||
});
|
||||
try expectTokens("a<=b", &.{
|
||||
.{ .name = "a" }, .lte, .{ .name = "b" }, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: string literal double quote" {
|
||||
try expectTokens("\"hello world\"", &.{ .{ .string = "hello world" }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: string literal single quote" {
|
||||
try expectTokens("'hello world'", &.{ .{ .string = "hello world" }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: string embeds the other quote type" {
|
||||
try expectTokens("\"it's\"", &.{ .{ .string = "it's" }, .eof });
|
||||
try expectTokens("'say \"hi\"'", &.{ .{ .string = "say \"hi\"" }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: empty string literal" {
|
||||
try expectTokens("''", &.{ .{ .string = "" }, .eof });
|
||||
try expectTokens("\"\"", &.{ .{ .string = "" }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: unterminated string emits partial — polyfill parity" {
|
||||
try expectTokens("'unterminated", &.{ .{ .string = "unterminated" }, .eof });
|
||||
try expectTokens("\"oops", &.{ .{ .string = "oops" }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: integer literals" {
|
||||
try expectTokens("0", &.{ .{ .number = 0 }, .eof });
|
||||
try expectTokens("42", &.{ .{ .number = 42 }, .eof });
|
||||
try expectTokens("12345", &.{ .{ .number = 12345 }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: float literals" {
|
||||
try expectTokens("3.14", &.{ .{ .number = 3.14 }, .eof });
|
||||
try expectTokens("0.5", &.{ .{ .number = 0.5 }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: leading-dot float (.5)" {
|
||||
try expectTokens(".5", &.{ .{ .number = 0.5 }, .eof });
|
||||
try expectTokens(".25", &.{ .{ .number = 0.25 }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: trailing-dot float (5.)" {
|
||||
try expectTokens("5.", &.{ .{ .number = 5 }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: leading zeros are decimal, not octal" {
|
||||
try expectTokens("007", &.{ .{ .number = 7 }, .eof });
|
||||
try expectTokens("0042", &.{ .{ .number = 42 }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: multi-digit fraction parses with parseFloat precision" {
|
||||
// Anchors that the slice is round-tripped through parseFloat (the
|
||||
// polyfill calls Number()). The old hand-rolled `place *= 0.1`
|
||||
// accumulator drifted on long fractions.
|
||||
try expectTokens("0.123456789", &.{ .{ .number = 0.123456789 }, .eof });
|
||||
try expectTokens("123.456", &.{ .{ .number = 123.456 }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: dot followed by non-digit emits dot token" {
|
||||
try expectTokens(".x", &.{ .dot, .{ .name = "x" }, .eof });
|
||||
try expectTokens(".", &.{ .dot, .eof });
|
||||
try expectTokens(". 3", &.{ .dot, .{ .number = 3 }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: bare identifier" {
|
||||
try expectTokens("foo", &.{ .{ .name = "foo" }, .eof });
|
||||
try expectTokens("_x", &.{ .{ .name = "_x" }, .eof });
|
||||
try expectTokens("MixedCase", &.{ .{ .name = "MixedCase" }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: identifier with digits, dashes, dots" {
|
||||
try expectTokens("foo-bar", &.{ .{ .name = "foo-bar" }, .eof });
|
||||
try expectTokens("foo.bar", &.{ .{ .name = "foo.bar" }, .eof });
|
||||
try expectTokens("a1b2", &.{ .{ .name = "a1b2" }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: namespace-prefixed name" {
|
||||
try expectTokens("xhtml:div", &.{ .{ .name = "xhtml:div" }, .eof });
|
||||
try expectTokens("svg:*", &.{ .{ .name = "svg:*" }, .eof });
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: name followed by `::` keeps the colon for the axis token" {
|
||||
try expectTokens("child::node", &.{
|
||||
.{ .name = "child" }, .double_colon, .{ .name = "node" }, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: name immediately followed by `(` is two tokens" {
|
||||
// Function-call detection happens in the parser.
|
||||
try expectTokens("count()", &.{
|
||||
.{ .name = "count" }, .lparen, .rparen, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: keywords or/and/div/mod tokenize as plain names" {
|
||||
try expectTokens("a or b", &.{
|
||||
.{ .name = "a" }, .{ .name = "or" }, .{ .name = "b" }, .eof,
|
||||
});
|
||||
try expectTokens("3 div 4", &.{
|
||||
.{ .number = 3 }, .{ .name = "div" }, .{ .number = 4 }, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: unknown character silently skipped" {
|
||||
try expectTokens("?foo", &.{ .{ .name = "foo" }, .eof });
|
||||
try expectTokens("foo?bar", &.{
|
||||
.{ .name = "foo" }, .{ .name = "bar" }, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: representative path expression" {
|
||||
try expectTokens("//div[@class='x']/p[2]", &.{
|
||||
.double_slash,
|
||||
.{ .name = "div" },
|
||||
.lbracket,
|
||||
.at,
|
||||
.{ .name = "class" },
|
||||
.eq,
|
||||
.{ .string = "x" },
|
||||
.rbracket,
|
||||
.slash,
|
||||
.{ .name = "p" },
|
||||
.lbracket,
|
||||
.{ .number = 2 },
|
||||
.rbracket,
|
||||
.eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: representative axis + predicate expression" {
|
||||
try expectTokens(
|
||||
"ancestor-or-self::section/following-sibling::*[position()<=last()-1]",
|
||||
&.{
|
||||
.{ .name = "ancestor-or-self" },
|
||||
.double_colon,
|
||||
.{ .name = "section" },
|
||||
.slash,
|
||||
.{ .name = "following-sibling" },
|
||||
.double_colon,
|
||||
.star,
|
||||
.lbracket,
|
||||
.{ .name = "position" },
|
||||
.lparen,
|
||||
.rparen,
|
||||
.lte,
|
||||
.{ .name = "last" },
|
||||
.lparen,
|
||||
.rparen,
|
||||
.minus,
|
||||
.{ .number = 1 },
|
||||
.rbracket,
|
||||
.eof,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: parent-axis abbreviation" {
|
||||
try expectTokens("../foo", &.{
|
||||
.double_dot, .slash, .{ .name = "foo" }, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: filter expression with predicate" {
|
||||
try expectTokens("(//a)[1]", &.{
|
||||
.lparen, .double_slash, .{ .name = "a" }, .rparen,
|
||||
.lbracket, .{ .number = 1 }, .rbracket, .eof,
|
||||
});
|
||||
}
|
||||
|
||||
test "XPath.Tokenizer: variable reference" {
|
||||
try expectTokens("$x + 1", &.{
|
||||
.dollar, .{ .name = "x" }, .plus, .{ .number = 1 }, .eof,
|
||||
});
|
||||
}
|
||||
133
src/browser/xpath/ast.zig
Normal file
133
src/browser/xpath/ast.zig
Normal file
@@ -0,0 +1,133 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! XPath 1.0 AST.
|
||||
//!
|
||||
//! Slices and pointers are arena-owned by the Parser; the AST has no
|
||||
//! destructor.
|
||||
|
||||
pub const Expr = union(enum) {
|
||||
/// Absolute or relative location path: `/foo/bar`, `//x`, `foo/bar`.
|
||||
path: Path,
|
||||
/// Filter expression followed by a location-path tail:
|
||||
/// `(//a)/b`, `(expr)//c`.
|
||||
filter_path: FilterPath,
|
||||
/// Filter expression with a single predicate: `(expr)[n]`.
|
||||
/// Multi-predicate filters nest: `(e)[1][2]` → filter(filter(e,1),2).
|
||||
filter: Filter,
|
||||
binop: BinOp,
|
||||
/// Unary minus. The polyfill has no unary `+`.
|
||||
neg: *Expr,
|
||||
/// String literal, quotes stripped.
|
||||
literal: []const u8,
|
||||
/// Numeric literal, parsed to f64.
|
||||
number: f64,
|
||||
/// Variable reference. The leading `$` is stripped; per decision #3
|
||||
/// the evaluator always returns the empty string.
|
||||
var_ref: []const u8,
|
||||
fn_call: FnCall,
|
||||
};
|
||||
|
||||
pub const Path = struct {
|
||||
absolute: bool,
|
||||
steps: []const Step,
|
||||
};
|
||||
|
||||
pub const FilterPath = struct {
|
||||
filter: *Expr,
|
||||
steps: []const Step,
|
||||
};
|
||||
|
||||
pub const Filter = struct {
|
||||
expr: *Expr,
|
||||
predicate: *Expr,
|
||||
};
|
||||
|
||||
pub const BinOp = struct {
|
||||
op: BinOpKind,
|
||||
left: *Expr,
|
||||
right: *Expr,
|
||||
};
|
||||
|
||||
pub const BinOpKind = enum {
|
||||
or_,
|
||||
and_,
|
||||
eq,
|
||||
neq,
|
||||
lt,
|
||||
gt,
|
||||
lte,
|
||||
gte,
|
||||
add,
|
||||
sub,
|
||||
mul,
|
||||
div,
|
||||
mod,
|
||||
union_,
|
||||
};
|
||||
|
||||
pub const FnCall = struct {
|
||||
name: []const u8,
|
||||
args: []const *Expr,
|
||||
};
|
||||
|
||||
pub const Step = struct {
|
||||
axis: Axis,
|
||||
node_test: NodeTest,
|
||||
predicates: []const *Expr,
|
||||
};
|
||||
|
||||
pub const Axis = enum {
|
||||
child,
|
||||
descendant,
|
||||
descendant_or_self,
|
||||
self,
|
||||
parent,
|
||||
ancestor,
|
||||
ancestor_or_self,
|
||||
following_sibling,
|
||||
preceding_sibling,
|
||||
following,
|
||||
preceding,
|
||||
attribute,
|
||||
namespace,
|
||||
/// Polyfill parity (decision #2): unknown axis names parse to
|
||||
/// this variant; the evaluator returns an empty node-set.
|
||||
unknown,
|
||||
};
|
||||
|
||||
pub const NodeTest = union(enum) {
|
||||
/// Element / attribute name. `"*"` is the wildcard. Namespaced forms
|
||||
/// (`prefix:*`, `prefix:local`) are stored verbatim — the evaluator
|
||||
/// does not split them, so they fall through to a literal `mem.eql`
|
||||
/// against the node name (consistent with the `namespace::` axis stub
|
||||
/// per decision #3).
|
||||
/// TODO: real namespace support if the polyfill ever drops the stub.
|
||||
name: []const u8,
|
||||
/// `node()`, `text()`, `comment()`, `processing-instruction()`.
|
||||
/// The optional target literal of `processing-instruction("foo")`
|
||||
/// is consumed but not stored (decision #3 stub).
|
||||
type_test: TypeTest,
|
||||
};
|
||||
|
||||
pub const TypeTest = enum {
|
||||
node,
|
||||
text,
|
||||
comment,
|
||||
processing_instruction,
|
||||
};
|
||||
630
src/browser/xpath/functions.zig
Normal file
630
src/browser/xpath/functions.zig
Normal file
@@ -0,0 +1,630 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! XPath 1.0 core function library — 25 functions covering the spec's
|
||||
//! core function set. `position()` and `last()` live in
|
||||
//! `Evaluator.evalFnCall` because they need the `(pos, size)` closure
|
||||
//! that this module never sees.
|
||||
//!
|
||||
//! Args are pre-evaluated by the caller (`Evaluator.evalFnCall`). Eager
|
||||
//! evaluation is fine here — short-circuit operators (`or`/`and`) are
|
||||
//! binops, not function calls, so laziness isn't required. The
|
||||
//! pre-evaluation contract also keeps functions.zig free of a circular
|
||||
//! import on Evaluator.zig.
|
||||
//!
|
||||
//! Stubs per decision #3:
|
||||
//! - `lang(string)` → always false
|
||||
//! - `namespace-uri(...)` → always ""
|
||||
//! - `name`/`local-name` → lowercased (HTML pragmatism)
|
||||
//!
|
||||
//! Allocations land in the caller's per-evaluation arena.
|
||||
|
||||
const std = @import("std");
|
||||
const lp = @import("lightpanda");
|
||||
|
||||
const Node = @import("../webapi/Node.zig");
|
||||
|
||||
const result = @import("result.zig");
|
||||
|
||||
const Frame = lp.Frame;
|
||||
const Element = Node.Element;
|
||||
const Document = Node.Document;
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
pub const Error = error{
|
||||
OutOfMemory,
|
||||
WriteFailed,
|
||||
StringTooLarge,
|
||||
UnknownFunction,
|
||||
};
|
||||
|
||||
/// Dispatch a core-library function call. Returns `error.UnknownFunction`
|
||||
/// if `name` doesn't match — the caller (Evaluator) handles
|
||||
/// `position()` / `last()` inline before getting here, so this is the
|
||||
/// last lookup stop.
|
||||
pub fn call(
|
||||
arena: Allocator,
|
||||
name: []const u8,
|
||||
args: []const result.Result,
|
||||
ctx: *Node,
|
||||
frame: *Frame,
|
||||
) Error!result.Result {
|
||||
// -- Node-set --
|
||||
if (eql(name, "count")) return .{ .number = countFn(args) };
|
||||
if (eql(name, "id")) return idFn(arena, args, ctx, frame);
|
||||
if (eql(name, "local-name")) return .{ .string = try localNameFn(arena, args, ctx) };
|
||||
if (eql(name, "name")) return .{ .string = try nameFn(arena, args, ctx) };
|
||||
if (eql(name, "namespace-uri")) return .{ .string = "" };
|
||||
|
||||
// -- String --
|
||||
if (eql(name, "string")) return .{ .string = try stringFn(arena, args, ctx) };
|
||||
if (eql(name, "concat")) return .{ .string = try concatFn(arena, args) };
|
||||
if (eql(name, "starts-with")) return .{ .boolean = try startsWithFn(arena, args) };
|
||||
if (eql(name, "contains")) return .{ .boolean = try containsFn(arena, args) };
|
||||
if (eql(name, "substring-before")) return .{ .string = try substringBeforeFn(arena, args) };
|
||||
if (eql(name, "substring-after")) return .{ .string = try substringAfterFn(arena, args) };
|
||||
if (eql(name, "substring")) return .{ .string = try substringFn(arena, args) };
|
||||
if (eql(name, "string-length")) return .{ .number = try stringLengthFn(arena, args, ctx) };
|
||||
if (eql(name, "normalize-space")) return .{ .string = try normalizeSpaceFn(arena, args, ctx) };
|
||||
if (eql(name, "translate")) return .{ .string = try translateFn(arena, args) };
|
||||
|
||||
// -- Boolean --
|
||||
if (eql(name, "boolean")) return .{ .boolean = if (args.len == 0) false else result.toBoolean(args[0]) };
|
||||
if (eql(name, "not")) return .{ .boolean = if (args.len == 0) true else !result.toBoolean(args[0]) };
|
||||
if (eql(name, "true")) return .{ .boolean = true };
|
||||
if (eql(name, "false")) return .{ .boolean = false };
|
||||
if (eql(name, "lang")) return .{ .boolean = false };
|
||||
|
||||
// -- Number --
|
||||
if (eql(name, "number")) return .{ .number = try numberFn(arena, args, ctx) };
|
||||
if (eql(name, "sum")) return .{ .number = try sumFn(arena, args) };
|
||||
if (eql(name, "floor")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.floor(try result.toNumber(arena, args[0])) };
|
||||
if (eql(name, "ceiling")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.ceil(try result.toNumber(arena, args[0])) };
|
||||
if (eql(name, "round")) return .{ .number = if (args.len == 0) std.math.nan(f64) else roundHalfToPosInf(try result.toNumber(arena, args[0])) };
|
||||
|
||||
return error.UnknownFunction;
|
||||
}
|
||||
|
||||
inline fn eql(a: []const u8, b: []const u8) bool {
|
||||
return std.mem.eql(u8, a, b);
|
||||
}
|
||||
|
||||
// ----- node-set fns -----
|
||||
|
||||
fn countFn(args: []const result.Result) f64 {
|
||||
if (args.len == 0 or args[0] != .node_set) return 0;
|
||||
return @floatFromInt(args[0].node_set.len);
|
||||
}
|
||||
|
||||
fn idFn(arena: Allocator, args: []const result.Result, ctx: *Node, frame: *Frame) Error!result.Result {
|
||||
if (args.len == 0) return .{ .node_set = &.{} };
|
||||
|
||||
// Polyfill: node-set arg → join `stringVal(n)` of each by ' '. Scalar
|
||||
// arg → `toStr`. Then split on whitespace and look up each token.
|
||||
const id_str: []const u8 = blk: {
|
||||
if (args[0] == .node_set) {
|
||||
var buf = std.Io.Writer.Allocating.init(arena);
|
||||
for (args[0].node_set, 0..) |n, i| {
|
||||
if (i > 0) try buf.writer.writeByte(' ');
|
||||
const sv = try result.stringValueOf(arena, n);
|
||||
try buf.writer.writeAll(sv);
|
||||
}
|
||||
break :blk buf.written();
|
||||
}
|
||||
break :blk try result.toString(arena, args[0]);
|
||||
};
|
||||
|
||||
// `ctx.ownerDocument || ctx` — document nodes own themselves.
|
||||
const doc = ctx.ownerDocument(frame) orelse (ctx.is(Document) orelse return .{ .node_set = &.{} });
|
||||
|
||||
var seen: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty;
|
||||
var it = std.mem.tokenizeAny(u8, id_str, &std.ascii.whitespace);
|
||||
while (it.next()) |tok| {
|
||||
if (doc.getElementById(tok, frame)) |el| {
|
||||
try seen.put(arena, el.asNode(), {});
|
||||
}
|
||||
}
|
||||
return .{ .node_set = seen.keys() };
|
||||
}
|
||||
|
||||
fn localNameFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 {
|
||||
const node = firstNodeOrCtx(args, ctx) orelse return "";
|
||||
// For Element, `getLocalName` returns a slice into `_tag_name`
|
||||
// (lowercase, namespace-prefix stripped) — lifetime exceeds the
|
||||
// per-evaluation arena, so we borrow instead of duping.
|
||||
if (node.is(Element)) |el| return el.getLocalName();
|
||||
var buf: [256]u8 = undefined;
|
||||
return std.ascii.allocLowerString(arena, node.getNodeName(&buf));
|
||||
}
|
||||
|
||||
fn nameFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 {
|
||||
const node = firstNodeOrCtx(args, ctx) orelse return "";
|
||||
// Diverges from `local-name` only on namespaced elements: `name`
|
||||
// keeps the prefix (`ns:foo`), `local-name` strips it (`foo`).
|
||||
if (node.is(Element)) |el| return el.getTagNameLower();
|
||||
var buf: [256]u8 = undefined;
|
||||
return std.ascii.allocLowerString(arena, node.getNodeName(&buf));
|
||||
}
|
||||
|
||||
fn firstNodeOrCtx(args: []const result.Result, ctx: *Node) ?*Node {
|
||||
if (args.len == 0) return ctx;
|
||||
if (args[0] != .node_set) return null;
|
||||
if (args[0].node_set.len == 0) return null;
|
||||
return args[0].node_set[0];
|
||||
}
|
||||
|
||||
// ----- string fns -----
|
||||
|
||||
fn stringFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 {
|
||||
if (args.len == 0) return try result.stringValueOf(arena, ctx);
|
||||
return try result.toString(arena, args[0]);
|
||||
}
|
||||
|
||||
fn concatFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
|
||||
var buf = std.Io.Writer.Allocating.init(arena);
|
||||
for (args) |a| {
|
||||
const s = try result.toString(arena, a);
|
||||
try buf.writer.writeAll(s);
|
||||
}
|
||||
return buf.written();
|
||||
}
|
||||
|
||||
fn startsWithFn(arena: Allocator, args: []const result.Result) Error!bool {
|
||||
if (args.len < 2) return false;
|
||||
const s1 = try result.toString(arena, args[0]);
|
||||
const s2 = try result.toString(arena, args[1]);
|
||||
return std.mem.startsWith(u8, s1, s2);
|
||||
}
|
||||
|
||||
fn containsFn(arena: Allocator, args: []const result.Result) Error!bool {
|
||||
if (args.len < 2) return false;
|
||||
const s1 = try result.toString(arena, args[0]);
|
||||
const s2 = try result.toString(arena, args[1]);
|
||||
return std.mem.indexOf(u8, s1, s2) != null;
|
||||
}
|
||||
|
||||
fn substringBeforeFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
|
||||
if (args.len < 2) return "";
|
||||
const s1 = try result.toString(arena, args[0]);
|
||||
const s2 = try result.toString(arena, args[1]);
|
||||
if (std.mem.indexOf(u8, s1, s2)) |idx| {
|
||||
return s1[0..idx];
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
fn substringAfterFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
|
||||
if (args.len < 2) return "";
|
||||
const s1 = try result.toString(arena, args[0]);
|
||||
const s2 = try result.toString(arena, args[1]);
|
||||
if (std.mem.indexOf(u8, s1, s2)) |idx| {
|
||||
return s1[idx + s2.len ..];
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
fn substringFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
|
||||
if (args.len < 2) return "";
|
||||
const s = try result.toString(arena, args[0]);
|
||||
const start_raw = try result.toNumber(arena, args[1]);
|
||||
if (std.math.isNan(start_raw)) return "";
|
||||
const start = roundHalfToPosInf(start_raw);
|
||||
|
||||
const s_len: f64 = @floatFromInt(s.len);
|
||||
if (args.len >= 3) {
|
||||
const len_raw = try result.toNumber(arena, args[2]);
|
||||
if (std.math.isNan(len_raw)) return "";
|
||||
const len = roundHalfToPosInf(len_raw);
|
||||
const sum = start - 1 + len;
|
||||
// -inf + inf is NaN; @intFromFloat(NaN) is illegal behavior.
|
||||
if (std.math.isNan(sum)) return "";
|
||||
const si_f = @max(start - 1, 0);
|
||||
const ei_f = @min(sum, s_len);
|
||||
if (si_f >= ei_f) return "";
|
||||
const si: usize = @intFromFloat(si_f);
|
||||
const ei: usize = @intFromFloat(ei_f);
|
||||
return s[si..ei];
|
||||
}
|
||||
|
||||
const si_f = @max(start - 1, 0);
|
||||
if (si_f >= s_len) return "";
|
||||
const si: usize = @intFromFloat(si_f);
|
||||
return s[si..];
|
||||
}
|
||||
|
||||
fn stringLengthFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error!f64 {
|
||||
const s = if (args.len == 0)
|
||||
try result.stringValueOf(arena, ctx)
|
||||
else
|
||||
try result.toString(arena, args[0]);
|
||||
// Polyfill returns UTF-16 code units; we return UTF-8 bytes. They
|
||||
// agree on ASCII (the gem's 91-case battery is ASCII-only). See
|
||||
// .claude/skills/xpath-port/NOTES.md for the divergence rationale.
|
||||
return @floatFromInt(s.len);
|
||||
}
|
||||
|
||||
fn normalizeSpaceFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 {
|
||||
const s = if (args.len == 0)
|
||||
try result.stringValueOf(arena, ctx)
|
||||
else
|
||||
try result.toString(arena, args[0]);
|
||||
|
||||
const trimmed = std.mem.trim(u8, s, &std.ascii.whitespace);
|
||||
if (trimmed.len == 0) return "";
|
||||
|
||||
var buf = std.Io.Writer.Allocating.init(arena);
|
||||
var prev_space = false;
|
||||
for (trimmed) |c| {
|
||||
if (std.ascii.isWhitespace(c)) {
|
||||
if (!prev_space) try buf.writer.writeByte(' ');
|
||||
prev_space = true;
|
||||
} else {
|
||||
try buf.writer.writeByte(c);
|
||||
prev_space = false;
|
||||
}
|
||||
}
|
||||
return buf.written();
|
||||
}
|
||||
|
||||
fn translateFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
|
||||
if (args.len < 3) return "";
|
||||
const s = try result.toString(arena, args[0]);
|
||||
const from = try result.toString(arena, args[1]);
|
||||
const to = try result.toString(arena, args[2]);
|
||||
|
||||
var buf = std.Io.Writer.Allocating.init(arena);
|
||||
for (s) |c| {
|
||||
if (std.mem.indexOfScalar(u8, from, c)) |idx| {
|
||||
// Chars in `from` past `to.len` are deleted (no copy).
|
||||
if (idx < to.len) try buf.writer.writeByte(to[idx]);
|
||||
} else {
|
||||
try buf.writer.writeByte(c);
|
||||
}
|
||||
}
|
||||
return buf.written();
|
||||
}
|
||||
|
||||
// ----- number fns -----
|
||||
|
||||
fn numberFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error!f64 {
|
||||
if (args.len == 0) {
|
||||
const sv = try result.stringValueOf(arena, ctx);
|
||||
return result.stringToNumber(sv);
|
||||
}
|
||||
return try result.toNumber(arena, args[0]);
|
||||
}
|
||||
|
||||
fn sumFn(arena: Allocator, args: []const result.Result) Error!f64 {
|
||||
if (args.len == 0 or args[0] != .node_set) return std.math.nan(f64);
|
||||
var total: f64 = 0;
|
||||
for (args[0].node_set) |n| {
|
||||
const sv = try result.stringValueOf(arena, n);
|
||||
total += result.stringToNumber(sv);
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
/// Round half toward positive infinity. Matches JS `Math.round` (the
|
||||
/// polyfill calls it for both `round()` and `substring()`):
|
||||
/// round(0.5) = 1 round(-0.5) = 0
|
||||
/// round(1.5) = 2 round(-1.5) = -1
|
||||
/// Diverges from Zig's `@round` (away from zero): `@round(-0.5) = -1`.
|
||||
fn roundHalfToPosInf(n: f64) f64 {
|
||||
if (std.math.isNan(n) or !std.math.isFinite(n)) return n;
|
||||
return std.math.floor(n + 0.5);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Tests — pure-logic only. Functions that need a real DOM (id, name,
|
||||
// local-name, string with element ctx, sum, count of node-set, etc.)
|
||||
// are exercised via Phase 9 HTML fixtures in tests/xpath/.
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
const testing = std.testing;
|
||||
const Tokenizer = @import("Tokenizer.zig");
|
||||
const Parser = @import("Parser.zig");
|
||||
const Evaluator = @import("Evaluator.zig");
|
||||
|
||||
fn evalScalar(a: Allocator, src: []const u8) !result.Result {
|
||||
const expr = try Parser.parse(a, src);
|
||||
// Synthetic Frame/Node pointers — the public `evaluate` entry only
|
||||
// touches the Frame for path/axis evaluation. Pure-scalar expressions
|
||||
// (arithmetic, function calls returning scalars) never deref it.
|
||||
return Evaluator.evaluate(a, expr, @ptrFromInt(0x2000), @ptrFromInt(0x1000));
|
||||
}
|
||||
|
||||
test "Functions: count() of non-node-set returns 0" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const r = try evalScalar(arena.allocator(), "count('hello')");
|
||||
try testing.expect(r == .number);
|
||||
try testing.expectEqual(@as(f64, 0), r.number);
|
||||
}
|
||||
|
||||
test "Functions: string() on scalar coerces" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "string(42)", "42" },
|
||||
.{ "string(3.14)", "3.14" },
|
||||
.{ "string(true())", "true" },
|
||||
.{ "string(false())", "false" },
|
||||
.{ "string('hello')", "hello" },
|
||||
.{ "string(0)", "0" },
|
||||
.{ "string(-1)", "-1" },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .string);
|
||||
try testing.expectEqualStrings(case[1], r.string);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: concat() variadic" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "concat('a', 'b')", "ab" },
|
||||
.{ "concat('a', 'b', 'c')", "abc" },
|
||||
.{ "concat('foo', '-', 'bar', '-', 'baz')", "foo-bar-baz" },
|
||||
.{ "concat('x', 1, 'y')", "x1y" },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .string);
|
||||
try testing.expectEqualStrings(case[1], r.string);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: starts-with / contains" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "starts-with('hello', 'he')", true },
|
||||
.{ "starts-with('hello', 'el')", false },
|
||||
.{ "starts-with('hello', '')", true },
|
||||
.{ "contains('hello world', 'wor')", true },
|
||||
.{ "contains('hello', 'xyz')", false },
|
||||
.{ "contains('hello', '')", true },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .boolean);
|
||||
try testing.expectEqual(case[1], r.boolean);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: substring-before / substring-after" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "substring-before('1999/04/01', '/')", "1999" },
|
||||
.{ "substring-before('hello', 'xyz')", "" },
|
||||
.{ "substring-after('1999/04/01', '/')", "04/01" },
|
||||
.{ "substring-after('hello', 'xyz')", "" },
|
||||
.{ "substring-after('hello', '')", "hello" },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .string);
|
||||
try testing.expectEqualStrings(case[1], r.string);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: substring() — XPath 1-based, rounding, NaN handling" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "substring('12345', 2, 3)", "234" },
|
||||
.{ "substring('12345', 2)", "2345" },
|
||||
// XPath spec example: round(1.5) = 2 → start at pos 2, len 2.
|
||||
.{ "substring('12345', 1.5, 2.6)", "234" },
|
||||
// start = 0: si = max(-1, 0) = 0, ei = min(0 - 1 + 3, len) = 2.
|
||||
.{ "substring('12345', 0, 3)", "12" },
|
||||
// Negative start clamps to 0.
|
||||
.{ "substring('12345', -3, 7)", "123" },
|
||||
// NaN start.
|
||||
.{ "substring('12345', 'foo')", "" },
|
||||
// NaN length.
|
||||
.{ "substring('12345', 1, 'foo')", "" },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .string);
|
||||
try testing.expectEqualStrings(case[1], r.string);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: string-length on scalar arg" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "string-length('hello')", 5 },
|
||||
.{ "string-length('')", 0 },
|
||||
.{ "string-length('a b c')", 5 },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .number);
|
||||
try testing.expectEqual(@as(f64, case[1]), r.number);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: normalize-space" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "normalize-space(' hello world ')", "hello world" },
|
||||
.{ "normalize-space('hello')", "hello" },
|
||||
.{ "normalize-space('')", "" },
|
||||
.{ "normalize-space(' ')", "" },
|
||||
.{ "normalize-space('a\tb\nc')", "a b c" },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .string);
|
||||
try testing.expectEqualStrings(case[1], r.string);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: translate" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
// Standard XPath spec example.
|
||||
.{ "translate('bar', 'abc', 'ABC')", "BAr" },
|
||||
// Char in `from` past `to.len` is deleted.
|
||||
.{ "translate('--aaa--', 'abc-', 'ABC')", "AAA" },
|
||||
.{ "translate('hello', '', '')", "hello" },
|
||||
// Identity.
|
||||
.{ "translate('abc', 'abc', 'abc')", "abc" },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .string);
|
||||
try testing.expectEqualStrings(case[1], r.string);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: boolean / not / true / false / lang" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "true()", true },
|
||||
.{ "false()", false },
|
||||
.{ "not(true())", false },
|
||||
.{ "not(false())", true },
|
||||
.{ "boolean(1)", true },
|
||||
.{ "boolean(0)", false },
|
||||
.{ "boolean('')", false },
|
||||
.{ "boolean('x')", true },
|
||||
// lang is a stub — always false.
|
||||
.{ "lang('en')", false },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .boolean);
|
||||
try testing.expectEqual(case[1], r.boolean);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: number() on scalar arg" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
{
|
||||
const r = try evalScalar(a, "number('42')");
|
||||
try testing.expectEqual(@as(f64, 42), r.number);
|
||||
}
|
||||
{
|
||||
const r = try evalScalar(a, "number(true())");
|
||||
try testing.expectEqual(@as(f64, 1), r.number);
|
||||
}
|
||||
{
|
||||
const r = try evalScalar(a, "number(false())");
|
||||
try testing.expectEqual(@as(f64, 0), r.number);
|
||||
}
|
||||
{
|
||||
const r = try evalScalar(a, "number('foo')");
|
||||
try testing.expect(std.math.isNan(r.number));
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: floor / ceiling / round" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
inline for (.{
|
||||
.{ "floor(1.5)", 1 },
|
||||
.{ "floor(-1.5)", -2 },
|
||||
.{ "floor(0)", 0 },
|
||||
.{ "ceiling(1.5)", 2 },
|
||||
.{ "ceiling(-1.5)", -1 },
|
||||
.{ "ceiling(0)", 0 },
|
||||
// Half-toward-positive-infinity (JS Math.round behavior).
|
||||
.{ "round(0.5)", 1 },
|
||||
.{ "round(-0.5)", 0 },
|
||||
.{ "round(1.5)", 2 },
|
||||
.{ "round(-1.5)", -1 },
|
||||
.{ "round(2.5)", 3 },
|
||||
}) |case| {
|
||||
const r = try evalScalar(a, case[0]);
|
||||
try testing.expect(r == .number);
|
||||
try testing.expectEqual(@as(f64, case[1]), r.number);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: round/floor/ceiling propagate NaN and Infinity" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
{
|
||||
const r = try evalScalar(a, "round(1 div 0)"); // +Infinity
|
||||
try testing.expect(std.math.isPositiveInf(r.number));
|
||||
}
|
||||
{
|
||||
const r = try evalScalar(a, "round(0 div 0)"); // NaN
|
||||
try testing.expect(std.math.isNan(r.number));
|
||||
}
|
||||
{
|
||||
const r = try evalScalar(a, "floor(0 div 0)");
|
||||
try testing.expect(std.math.isNan(r.number));
|
||||
}
|
||||
{
|
||||
const r = try evalScalar(a, "ceiling(0 div 0)");
|
||||
try testing.expect(std.math.isNan(r.number));
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: sum / count on non-node-set defaults" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
|
||||
{
|
||||
const r = try evalScalar(a, "sum('hello')");
|
||||
try testing.expect(std.math.isNan(r.number));
|
||||
}
|
||||
{
|
||||
const r = try evalScalar(a, "count('hello')");
|
||||
try testing.expectEqual(@as(f64, 0), r.number);
|
||||
}
|
||||
}
|
||||
|
||||
test "Functions: roundHalfToPosInf" {
|
||||
try testing.expectEqual(@as(f64, 1), roundHalfToPosInf(0.5));
|
||||
try testing.expectEqual(@as(f64, 0), roundHalfToPosInf(-0.5));
|
||||
try testing.expectEqual(@as(f64, 2), roundHalfToPosInf(1.5));
|
||||
try testing.expectEqual(@as(f64, -1), roundHalfToPosInf(-1.5));
|
||||
try testing.expectEqual(@as(f64, 3), roundHalfToPosInf(2.5));
|
||||
try testing.expect(std.math.isNan(roundHalfToPosInf(std.math.nan(f64))));
|
||||
try testing.expect(std.math.isPositiveInf(roundHalfToPosInf(std.math.inf(f64))));
|
||||
try testing.expect(std.math.isNegativeInf(roundHalfToPosInf(-std.math.inf(f64))));
|
||||
}
|
||||
199
src/browser/xpath/result.zig
Normal file
199
src/browser/xpath/result.zig
Normal file
@@ -0,0 +1,199 @@
|
||||
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
|
||||
//
|
||||
// Francis Bouvier <francis@lightpanda.io>
|
||||
// Pierre Tachoire <pierre@lightpanda.io>
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
//! XPath 1.0 runtime values.
|
||||
//!
|
||||
//! Tagged union over the four XPath value types: node-set, number,
|
||||
//! string, boolean. Type coercion (`toString`, `toNumber`, `toBoolean`)
|
||||
//! follows XPath 1.0 spec §3, with HTML-pragmatic shortcuts (decision
|
||||
//! #2).
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
const Node = @import("../webapi/Node.zig");
|
||||
|
||||
const CData = Node.CData;
|
||||
const Allocator = std.mem.Allocator;
|
||||
|
||||
pub const Result = union(enum) {
|
||||
/// Owned by the evaluator's arena. Order is significant only at the
|
||||
/// public boundary, where the evaluator sorts to document order.
|
||||
node_set: []const *Node,
|
||||
number: f64,
|
||||
string: []const u8,
|
||||
boolean: bool,
|
||||
};
|
||||
|
||||
/// XPath spec §5: string-value of a node.
|
||||
///
|
||||
/// - Element / Document: concatenated text descendants (excluding
|
||||
/// comments and processing-instructions; matches `Node.getTextContent`)
|
||||
/// - Attribute: attribute value
|
||||
/// - Text / Comment / CDATA / PI: the node's data
|
||||
/// - DocumentType / DocumentFragment: empty (matches polyfill's
|
||||
/// `nodeValue || textContent || ''` fallthrough)
|
||||
///
|
||||
/// The returned slice is borrowed from the node for cdata/attribute
|
||||
/// (cheap, no allocation) and arena-allocated for element/document
|
||||
/// (concatenation buffer).
|
||||
pub fn stringValueOf(arena: Allocator, node: *Node) error{WriteFailed}![]const u8 {
|
||||
return switch (node._type) {
|
||||
.attribute => |attr| attr._value.str(),
|
||||
.cdata => |cd| cd._data.str(),
|
||||
.element, .document => blk: {
|
||||
var buf = std.Io.Writer.Allocating.init(arena);
|
||||
try node.getTextContent(&buf.writer);
|
||||
break :blk buf.written();
|
||||
},
|
||||
.document_type, .document_fragment => "",
|
||||
};
|
||||
}
|
||||
|
||||
pub fn toBoolean(val: Result) bool {
|
||||
return switch (val) {
|
||||
.boolean => |b| b,
|
||||
.number => |n| n != 0 and !std.math.isNan(n),
|
||||
.string => |s| s.len > 0,
|
||||
.node_set => |ns| ns.len > 0,
|
||||
};
|
||||
}
|
||||
|
||||
/// Numeric coercion. Empty / whitespace-only strings produce NaN
|
||||
/// (XPath spec §4.4 — matches JS `Number(' ') === 0` *not* applying
|
||||
/// because the polyfill calls `s.trim() === '' ? NaN : Number(s)`).
|
||||
pub fn toNumber(arena: Allocator, val: Result) error{WriteFailed}!f64 {
|
||||
return switch (val) {
|
||||
.number => |n| n,
|
||||
.boolean => |b| if (b) 1 else 0,
|
||||
.string => |s| stringToNumber(s),
|
||||
.node_set => |ns| blk: {
|
||||
if (ns.len == 0) break :blk std.math.nan(f64);
|
||||
const sv = try stringValueOf(arena, ns[0]);
|
||||
break :blk stringToNumber(sv);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
pub fn stringToNumber(s: []const u8) f64 {
|
||||
const trimmed = std.mem.trim(u8, s, &std.ascii.whitespace);
|
||||
if (trimmed.len == 0) return std.math.nan(f64);
|
||||
return std.fmt.parseFloat(f64, trimmed) catch std.math.nan(f64);
|
||||
}
|
||||
|
||||
/// String coercion. Allocates only for `.number` (formatting) and for
|
||||
/// `.node_set` whose first node is an Element/Document (text content
|
||||
/// concatenation). Boolean → static string. String → borrowed.
|
||||
pub fn toString(arena: Allocator, val: Result) error{ OutOfMemory, WriteFailed }![]const u8 {
|
||||
return switch (val) {
|
||||
.string => |s| s,
|
||||
.boolean => |b| if (b) "true" else "false",
|
||||
.number => |n| try numberToString(arena, n),
|
||||
.node_set => |ns| if (ns.len == 0) "" else try stringValueOf(arena, ns[0]),
|
||||
};
|
||||
}
|
||||
|
||||
/// XPath spec §4.2: NaN, ±0, and ±Infinity have specific spellings;
|
||||
/// integer-valued numbers print without trailing `.0`. Diverges from
|
||||
/// Zig's default `{d}` which prints `nan`/`inf` and may emit `-0`.
|
||||
pub fn numberToString(arena: Allocator, n: f64) error{OutOfMemory}![]const u8 {
|
||||
if (std.math.isNan(n)) return "NaN";
|
||||
if (std.math.isPositiveInf(n)) return "Infinity";
|
||||
if (std.math.isNegativeInf(n)) return "-Infinity";
|
||||
if (n == 0) return "0"; // covers +0 and -0
|
||||
if (@trunc(n) == n and n >= -9.007199254740992e15 and n <= 9.007199254740992e15) {
|
||||
return std.fmt.allocPrint(arena, "{d}", .{@as(i64, @intFromFloat(n))});
|
||||
}
|
||||
return std.fmt.allocPrint(arena, "{d}", .{n});
|
||||
}
|
||||
|
||||
const testing = std.testing;
|
||||
|
||||
test "Result: toBoolean" {
|
||||
try testing.expect(toBoolean(.{ .boolean = true }));
|
||||
try testing.expect(!toBoolean(.{ .boolean = false }));
|
||||
try testing.expect(toBoolean(.{ .number = 1 }));
|
||||
try testing.expect(!toBoolean(.{ .number = 0 }));
|
||||
try testing.expect(!toBoolean(.{ .number = std.math.nan(f64) }));
|
||||
try testing.expect(toBoolean(.{ .string = "x" }));
|
||||
try testing.expect(!toBoolean(.{ .string = "" }));
|
||||
try testing.expect(!toBoolean(.{ .node_set = &.{} }));
|
||||
}
|
||||
|
||||
test "Result: stringToNumber" {
|
||||
try testing.expectEqual(@as(f64, 42), stringToNumber("42"));
|
||||
try testing.expectEqual(@as(f64, 3.14), stringToNumber("3.14"));
|
||||
try testing.expectEqual(@as(f64, -1), stringToNumber("-1"));
|
||||
try testing.expectEqual(@as(f64, 5), stringToNumber(" 5 "));
|
||||
try testing.expect(std.math.isNan(stringToNumber("")));
|
||||
try testing.expect(std.math.isNan(stringToNumber(" ")));
|
||||
try testing.expect(std.math.isNan(stringToNumber("abc")));
|
||||
}
|
||||
|
||||
test "Result: numberToString — integers print without decimal" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
try testing.expectEqualStrings("5", try numberToString(a, 5));
|
||||
try testing.expectEqualStrings("0", try numberToString(a, 0));
|
||||
try testing.expectEqualStrings("0", try numberToString(a, -0.0));
|
||||
try testing.expectEqualStrings("-1", try numberToString(a, -1));
|
||||
try testing.expectEqualStrings("42", try numberToString(a, 42.0));
|
||||
}
|
||||
|
||||
test "Result: numberToString — special values" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
try testing.expectEqualStrings("NaN", try numberToString(a, std.math.nan(f64)));
|
||||
try testing.expectEqualStrings("Infinity", try numberToString(a, std.math.inf(f64)));
|
||||
try testing.expectEqualStrings("-Infinity", try numberToString(a, -std.math.inf(f64)));
|
||||
}
|
||||
|
||||
test "Result: numberToString — floats" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
const a = arena.allocator();
|
||||
try testing.expectEqualStrings("3.14", try numberToString(a, 3.14));
|
||||
try testing.expectEqualStrings("0.5", try numberToString(a, 0.5));
|
||||
}
|
||||
|
||||
test "Result: toString — boolean returns static string" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
try testing.expectEqualStrings("true", try toString(arena.allocator(), .{ .boolean = true }));
|
||||
try testing.expectEqualStrings("false", try toString(arena.allocator(), .{ .boolean = false }));
|
||||
}
|
||||
|
||||
test "Result: toString — node-set with empty arr is empty" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
try testing.expectEqualStrings("", try toString(arena.allocator(), .{ .node_set = &.{} }));
|
||||
}
|
||||
|
||||
test "Result: toNumber — empty node-set is NaN" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
try testing.expect(std.math.isNan(try toNumber(arena.allocator(), .{ .node_set = &.{} })));
|
||||
}
|
||||
|
||||
test "Result: toNumber — boolean coerces to 0/1" {
|
||||
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
||||
defer arena.deinit();
|
||||
try testing.expectEqual(@as(f64, 1), try toNumber(arena.allocator(), .{ .boolean = true }));
|
||||
try testing.expectEqual(@as(f64, 0), try toNumber(arena.allocator(), .{ .boolean = false }));
|
||||
}
|
||||
@@ -27,6 +27,7 @@ const dump = @import("../../browser/dump.zig");
|
||||
const js = @import("../../browser/js/js.zig");
|
||||
const DOMNode = @import("../../browser/webapi/Node.zig");
|
||||
const Selector = @import("../../browser/webapi/selector/Selector.zig");
|
||||
const xpath = @import("../../browser/xpath/Evaluator.zig");
|
||||
|
||||
const log = lp.log;
|
||||
const Allocator = std.mem.Allocator;
|
||||
@@ -91,6 +92,56 @@ fn getDocument(cmd: *CDP.Command) !void {
|
||||
return cmd.sendResult(.{ .root = bc.nodeWriter(node, .{ .depth = params.depth }) }, .{});
|
||||
}
|
||||
|
||||
// Closed set of XPath 1.0 named axes. Matched literally before `::` so
|
||||
// CSS pseudo-elements (`a::before`, `div::first-line`) don't get
|
||||
// misrouted to the XPath evaluator just because they have an
|
||||
// identifier-looking word before `::`.
|
||||
const xpath_axis_names = std.StaticStringMap(void).initComptime(.{
|
||||
.{ "child", {} },
|
||||
.{ "descendant", {} },
|
||||
.{ "descendant-or-self", {} },
|
||||
.{ "self", {} },
|
||||
.{ "parent", {} },
|
||||
.{ "ancestor", {} },
|
||||
.{ "ancestor-or-self", {} },
|
||||
.{ "following-sibling", {} },
|
||||
.{ "preceding-sibling", {} },
|
||||
.{ "following", {} },
|
||||
.{ "preceding", {} },
|
||||
.{ "attribute", {} },
|
||||
.{ "namespace", {} },
|
||||
});
|
||||
|
||||
// Heuristic (decision #2/#9): treat the query as XPath when it begins
|
||||
// with a path operator or contains an axis specifier; otherwise fall
|
||||
// through to CSS.
|
||||
fn isXPathQuery(q: []const u8) bool {
|
||||
if (q.len == 0) return false;
|
||||
if (q[0] == '/') return true;
|
||||
if (q[0] == '.' and q.len > 1 and q[1] == '/') return true;
|
||||
if (q[0] == '(' and q.len > 1) {
|
||||
if (q[1] == '/') return true;
|
||||
if (q[1] == '.' and q.len > 2 and q[2] == '/') return true;
|
||||
}
|
||||
// For `::` to be an XPath axis separator, the identifier immediately
|
||||
// before it must be one of the 13 named axes. Walk back the run of
|
||||
// [a-zA-Z-] characters and look it up in the closed set.
|
||||
var idx: usize = 0;
|
||||
while (std.mem.indexOfPos(u8, q, idx, "::")) |hit| : (idx = hit + 1) {
|
||||
if (hit == 0) continue;
|
||||
var start = hit;
|
||||
while (start > 0) {
|
||||
const c = q[start - 1];
|
||||
const is_axis_char = (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '-';
|
||||
if (!is_axis_char) break;
|
||||
start -= 1;
|
||||
}
|
||||
if (start == hit) continue;
|
||||
if (xpath_axis_names.has(q[start..hit])) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-performSearch
|
||||
fn performSearch(cmd: *CDP.Command) !void {
|
||||
const params = (try cmd.params(struct {
|
||||
@@ -100,15 +151,23 @@ fn performSearch(cmd: *CDP.Command) !void {
|
||||
|
||||
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
|
||||
const frame = bc.session.currentFrame() orelse return error.FrameNotLoaded;
|
||||
const list = try Selector.querySelectorAll(frame.window._document.asNode(), params.query, frame);
|
||||
const root = frame.window._document.asNode();
|
||||
|
||||
if (isXPathQuery(params.query)) {
|
||||
const arena = try frame.getArena(.medium, "DOM.performSearch");
|
||||
defer frame.releaseArena(arena);
|
||||
const nodes = try xpath.searchAll(arena, root, params.query, frame);
|
||||
return finishSearch(cmd, bc, nodes);
|
||||
}
|
||||
|
||||
const list = try Selector.querySelectorAll(root, params.query, frame);
|
||||
defer list.deinit(frame._page);
|
||||
return finishSearch(cmd, bc, list._nodes);
|
||||
}
|
||||
|
||||
const search = try bc.node_search_list.create(list._nodes);
|
||||
|
||||
// dispatch setChildNodesEvents to inform the client of the subpart of node
|
||||
// tree covering the results.
|
||||
try dispatchSetChildNodes(cmd, list._nodes);
|
||||
|
||||
fn finishSearch(cmd: *CDP.Command, bc: *CDP.BrowserContext, nodes: []const *DOMNode) !void {
|
||||
const search = try bc.node_search_list.create(nodes);
|
||||
try dispatchSetChildNodes(cmd, nodes);
|
||||
return cmd.sendResult(.{
|
||||
.searchId = search.name,
|
||||
.resultCount = @as(u32, @intCast(search.node_ids.len)),
|
||||
@@ -616,6 +675,78 @@ test "cdp.dom: search flow" {
|
||||
try ctx.expectSentError(-31998, "SearchResultNotFound", .{ .id = 17 });
|
||||
}
|
||||
|
||||
test "cdp.dom: performSearch with XPath" {
|
||||
var ctx = try testing.context();
|
||||
defer ctx.deinit();
|
||||
|
||||
_ = try ctx.loadBrowserContext(.{ .id = "BID-A", .url = "cdp/perform_search_xpath.html" });
|
||||
|
||||
try ctx.processMessage(.{
|
||||
.id = 20,
|
||||
.method = "DOM.performSearch",
|
||||
.params = .{ .query = "//p" },
|
||||
});
|
||||
try ctx.expectSentResult(.{ .searchId = "0", .resultCount = 3 }, .{ .id = 20 });
|
||||
|
||||
try ctx.processMessage(.{
|
||||
.id = 21,
|
||||
.method = "DOM.performSearch",
|
||||
.params = .{ .query = "descendant::p" },
|
||||
});
|
||||
try ctx.expectSentResult(.{ .searchId = "1", .resultCount = 3 }, .{ .id = 21 });
|
||||
|
||||
try ctx.processMessage(.{
|
||||
.id = 22,
|
||||
.method = "DOM.performSearch",
|
||||
.params = .{ .query = "//*[@id='outer']" },
|
||||
});
|
||||
try ctx.expectSentResult(.{ .searchId = "2", .resultCount = 1 }, .{ .id = 22 });
|
||||
|
||||
try ctx.processMessage(.{
|
||||
.id = 23,
|
||||
.method = "DOM.performSearch",
|
||||
.params = .{ .query = "p" },
|
||||
});
|
||||
try ctx.expectSentResult(.{ .searchId = "3", .resultCount = 3 }, .{ .id = 23 });
|
||||
|
||||
try ctx.processMessage(.{
|
||||
.id = 24,
|
||||
.method = "DOM.performSearch",
|
||||
.params = .{ .query = "div p" },
|
||||
});
|
||||
try ctx.expectSentResult(.{ .searchId = "4", .resultCount = 2 }, .{ .id = 24 });
|
||||
}
|
||||
|
||||
test "cdp.dom: isXPathQuery heuristic" {
|
||||
// XPath-shaped queries — each line covers a distinct heuristic branch.
|
||||
try std.testing.expect(isXPathQuery("/html"));
|
||||
try std.testing.expect(isXPathQuery("//p"));
|
||||
try std.testing.expect(isXPathQuery(".//foo"));
|
||||
try std.testing.expect(isXPathQuery("(//foo)[1]"));
|
||||
try std.testing.expect(isXPathQuery("(./bar)[2]"));
|
||||
try std.testing.expect(isXPathQuery("descendant::p"));
|
||||
try std.testing.expect(isXPathQuery("ancestor-or-self::*"));
|
||||
try std.testing.expect(isXPathQuery("//*[@id='x']"));
|
||||
|
||||
// CSS-shaped queries — fall through to the existing path.
|
||||
try std.testing.expect(!isXPathQuery(""));
|
||||
try std.testing.expect(!isXPathQuery("p"));
|
||||
try std.testing.expect(!isXPathQuery("div p"));
|
||||
try std.testing.expect(!isXPathQuery("#main"));
|
||||
try std.testing.expect(!isXPathQuery(".cls"));
|
||||
try std.testing.expect(!isXPathQuery("[data-x]"));
|
||||
try std.testing.expect(!isXPathQuery("(p)")); // parens without path → CSS
|
||||
try std.testing.expect(!isXPathQuery(".x")); // leading dot without /
|
||||
|
||||
// CSS pseudo-elements: identifier before `::` is not an XPath axis name.
|
||||
try std.testing.expect(!isXPathQuery("a::before"));
|
||||
try std.testing.expect(!isXPathQuery("div::after"));
|
||||
try std.testing.expect(!isXPathQuery("p::first-line"));
|
||||
try std.testing.expect(!isXPathQuery("input::placeholder"));
|
||||
// Attribute selector with `::` inside a literal — nothing axis-like before it.
|
||||
try std.testing.expect(!isXPathQuery("[data-x=\"x::y\"]"));
|
||||
}
|
||||
|
||||
test "cdp.dom: querySelector unknown search id" {
|
||||
var ctx = try testing.context();
|
||||
defer ctx.deinit();
|
||||
|
||||
Reference in New Issue
Block a user