Merge pull request #2305 from navidemad/feat/xpath-1.0-evaluator

xpath: implement XPath 1.0 (Document.evaluate, XPathResult, DOM.performSearch)
This commit is contained in:
Pierre Tachoire
2026-05-11 10:01:28 +02:00
committed by GitHub
18 changed files with 4847 additions and 7 deletions

View File

@@ -935,6 +935,9 @@ pub const PageJsApis = flattenTypes(&.{
@import("../webapi/CryptoKey.zig"),
@import("../webapi/Selection.zig"),
@import("../webapi/ImageData.zig"),
@import("../webapi/XPathResult.zig"),
@import("../webapi/XPathExpression.zig"),
@import("../webapi/XPathEvaluator.zig"),
});
// APIs available on Worker context globals (constructors like URL, Headers, etc.)

View File

@@ -0,0 +1,8 @@
<!DOCTYPE html>
<body>
<div id=outer>
<p>1</p>
<p>2</p>
</div>
<p>3</p>
</body>

View File

@@ -0,0 +1,123 @@
<!DOCTYPE html>
<body>
<script src="../testing.js"></script>
<h1 id=title>Hello</h1>
<div class=group>
<p id=p1>First</p>
<p id=p2>Second</p>
<p id=p3>Third</p>
</div>
<span id=span1 data-x="42">x</span>
</body>
<script id=snapshot_basic>
{
const r = document.evaluate("//p", document, null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
testing.expectEqual(true, r instanceof XPathResult);
testing.expectEqual(XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, r.resultType);
testing.expectEqual(3, r.snapshotLength);
testing.expectEqual('p1', r.snapshotItem(0).id);
testing.expectEqual('p3', r.snapshotItem(2).id);
testing.expectEqual(null, r.snapshotItem(3));
}
</script>
<script id=default_context>
{
const r1 = document.evaluate("//p", null, null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
testing.expectEqual(3, r1.snapshotLength);
const r2 = document.evaluate("//p", undefined, null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
testing.expectEqual(3, r2.snapshotLength);
}
</script>
<script id=first_ordered_node>
{
const r = document.evaluate("//p", document, null,
XPathResult.FIRST_ORDERED_NODE_TYPE, null);
testing.expectEqual(XPathResult.FIRST_ORDERED_NODE_TYPE, r.resultType);
testing.expectEqual('p1', r.singleNodeValue.id);
const empty = document.evaluate("//nope", document, null,
XPathResult.FIRST_ORDERED_NODE_TYPE, null);
testing.expectEqual(null, empty.singleNodeValue);
}
</script>
<script id=number_type>
{
const r = document.evaluate("count(//p)", document, null,
XPathResult.NUMBER_TYPE, null);
testing.expectEqual(XPathResult.NUMBER_TYPE, r.resultType);
testing.expectEqual(3, r.numberValue);
}
</script>
<script id=string_type>
{
const r = document.evaluate("string(//h1)", document, null,
XPathResult.STRING_TYPE, null);
testing.expectEqual(XPathResult.STRING_TYPE, r.resultType);
testing.expectEqual('Hello', r.stringValue);
}
</script>
<script id=boolean_type>
{
const r = document.evaluate("count(//p) > 0", document, null,
XPathResult.BOOLEAN_TYPE, null);
testing.expectEqual(XPathResult.BOOLEAN_TYPE, r.resultType);
testing.expectEqual(true, r.booleanValue);
}
</script>
<script id=any_type>
{
const ns = document.evaluate("//p", document, null,
XPathResult.ANY_TYPE, null);
testing.expectEqual(XPathResult.UNORDERED_NODE_ITERATOR_TYPE, ns.resultType);
testing.expectEqual('p1', ns.iterateNext().id);
const num = document.evaluate("count(//p)", document, null,
XPathResult.ANY_TYPE, null);
testing.expectEqual(XPathResult.NUMBER_TYPE, num.resultType);
testing.expectEqual(3, num.numberValue);
}
</script>
<script id=context_node_scoping>
{
const div = document.querySelector('div.group');
const r = document.evaluate("./p", div, null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
testing.expectEqual(3, r.snapshotLength);
testing.expectEqual('p1', r.snapshotItem(0).id);
}
</script>
<script id=create_expression>
{
const expr = document.createExpression("//p", null);
testing.expectEqual(true, expr instanceof XPathExpression);
const r = expr.evaluate(document, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
testing.expectEqual(3, r.snapshotLength);
}
</script>
<script id=create_ns_resolver>
{
const resolver = document.createNSResolver(document);
testing.expectEqual(document, resolver);
}
</script>
<script id=attribute_axis>
{
const r = document.evaluate("//span/@data-x", document, null,
XPathResult.STRING_TYPE, null);
testing.expectEqual('42', r.stringValue);
}
</script>

View File

@@ -0,0 +1,201 @@
<!DOCTYPE html>
<html>
<head>
<title>XPath conformance</title>
<script src="../testing.js"></script>
</head>
<body>
<h1 id="heading" class="primary">Hello World</h1>
<p id="p1" lang="en" data-x="1">First paragraph with <em>emphasis</em>.</p>
<p id="p2" class="note">Second paragraph.</p>
<ul id="list">
<li class="item odd">Item 1</li>
<li class="item even">Item 2</li>
<li class="item odd">Item 3</li>
<li class="item even">Item 4</li>
<li class="item odd">Item 5</li>
</ul>
<table id="t">
<thead><tr><th>Name</th><th>Age</th></tr></thead>
<tbody>
<tr class="r"><td>Alice</td><td>30</td></tr>
<tr class="r"><td>Bob</td><td>25</td></tr>
<tr class="r"><td>Carol</td><td>40</td></tr>
</tbody>
</table>
<div id="container">
<section id="s1"><span>A</span><span>B</span></section>
<section id="s2"><a href="/foo" id="link1">Click me</a></section>
<section id="s3"><a href="/bar" id="link2">Other link</a></section>
</div>
<form id="form">
<label for="name">Name</label>
<input id="name" type="text" name="name" value="">
<input id="email" type="email" name="email" value="">
<input id="hidden" type="hidden" name="csrf" value="x">
<input id="checkbox" type="checkbox" name="agree">
<button id="btn" type="submit">Submit</button>
</form>
<!-- a comment node -->
<div id="multi-class" class="alpha beta gamma"></div>
<article id="art">
<p>One</p>
<p>Two</p>
<p>Three</p>
</article>
<script id=conformance_battery>
{
// XPath 1.0 conformance battery. Result counts derive from the body
// fixture above; keep the two in sync.
const cases = [
// Absolute paths
["/html", 1, "absolute root child"],
["/html/body", 1, "/html/body absolute"],
["/", 1, "root only"],
// Descendant abbreviations
["//h1", 1, "// descendant"],
["//ul/li", 5, "/ child"],
["//ul//li", 5, "// nested descendant"],
[".", 1, "self ."],
[".//li", 5, "context-rel descendant"],
// Wildcards
["//section/*", 4, "//section/* (2 spans + 2 anchors)"],
["//*[@id='heading']", 1, "//*[@id]"],
// Axes
["//li[1]/following-sibling::li", 4, "following-sibling"],
["//li[5]/preceding-sibling::li", 4, "preceding-sibling"],
["//li/parent::ul", 1, "parent::"],
["//li/ancestor::body", 1, "ancestor::body"],
["//li/ancestor-or-self::body", 1, "ancestor-or-self::"],
["//li[3]/preceding::li", 2, "preceding axis"],
["//li[1]/following::li", 4, "following axis"],
["//ul/descendant::li", 5, "descendant axis"],
["//ul/descendant-or-self::li", 5, "descendant-or-self::li"],
["//section[1]/child::span", 2, "child:: explicit"],
["//*[@id='heading']/self::h1", 1, "self:: type guard"],
// Attribute axis
["//a[1]/attribute::href", 2, "attribute::href"],
["//a[1]/@*", 4, "@* (2 anchors x 2 attrs)"],
// Position predicates
["//li[1]", 1, "[1]"],
["//li[last()]", 1, "[last()]"],
["//li[last() - 1]", 1, "[last() - 1]"],
["//li[position() = 1]", 1, "explicit position()"],
["//li[position() > 2]", 3, "position > 2"],
["//li[position() mod 2 = 1]", 3, "position mod 2 = 1 (odd)"],
["(//li)[1]", 1, "(//li)[1] filter on group"],
["(//section)[2]", 1, "(//section)[2] grouped"],
// Reverse-axis proximity-order predicates
["//li[3]/preceding-sibling::li[1]", 1, "reverse axis [1] = nearest"],
["//li[5]/ancestor::*[1]", 1, "ancestor::*[1] = parent ul"],
// Multi-predicate / chained
["//li[contains(concat(' ', @class, ' '), ' even ')][2]", 1, "filter then position [2]"],
["//*[@id='heading' and @class='primary']", 1, "and"],
["//*[@id='heading' or @id='p1']", 2, "or"],
// Sub-path predicates
["//section[a]", 2, "section with a child"],
["//section[count(span) = 2]", 1, "count() in predicate"],
["//ul[count(li) = 5]", 1, "count() = 5"],
["//tr[td[1]]", 3, "tr with first td (sub-step)"],
["//tr[td/text() = 'Bob']", 1, "deep sub-path equality"],
// String functions
["//*[starts-with(@id, 'link')]", 2, "starts-with"],
["//*[normalize-space() = 'Hello World']", 1, "normalize-space() default arg"],
["//*[normalize-space(.) = 'Item 1']", 1, "normalize-space(arg)"],
["//*[concat(@id, '-x') = 'heading-x']", 1, "concat"],
["//*[substring(@id, 1, 1) = 'p']", 2, "substring (3 args)"],
["//*[substring(@id, 2, 1) = '1' and starts-with(@id, 'p')]", 1, "substring constrained"],
["//p[translate(@id, 'p', 'q') = 'q1']", 1, "translate"],
["//*[substring-before(@id, '1') = 'p']", 1, "substring-before"],
["//*[substring-after(@id, 'lin') = 'k1']", 1, "substring-after"],
// Number functions
["//tr[number(td[2]) > 28]", 2, "number() in compare"],
["//tr[floor(number(td[2]) div 10) = 3]", 1, "floor + div"],
["//tr[ceiling(number(td[2]) div 10) = 3]", 2, "ceiling + div"],
["//tr[round(number(td[2]) div 10) = 3]", 2, "round half-up"],
["//ul[sum(li/@data-len) = 0]", 1, "sum() over empty path -> 0"],
// Boolean functions
["//p[boolean(@lang)]", 1, "boolean()"],
["//*[false()]", 0, "false() always-false"],
// name() / local-name() — lowercased per decision #2
["//*[name() = 'h1']", 1, "name() of context"],
["//*[local-name() = 'h1']", 1, "local-name() of context"],
// id()
["id('heading')", 1, "id()"],
["id('heading p1')", 2, "id() multi-token"],
["id(//em/parent::p/@id)", 1, "id() on attribute string-value"],
// Union
["//h1 | //title", 2, "union (h1 + title)"],
["//h1 | //*[@id='p1']", 2, "union of 2 different selectors"],
["//*[@id='heading'] | //*[@id='heading']", 1, "self-union dedups"],
// Arithmetic
["//li[position() + 1 = 3]", 1, "+"],
["//li[position() - 1 = 0]", 1, "-"],
["//li[position() * 2 = 4]", 1, "* multiply"],
["//li[position() div 2 = 1]", 1, "div"],
["//li[(position() mod 2) = 0]", 2, "mod"],
// Comparison — header tr's td[2] = 'Age' coerces to NaN, so
// != against any number is true (NaN equals nothing).
["//tr[number(td[2]) = 30]", 1, "= numeric"],
["//tr[number(td[2]) != 30]", 3, "!= numeric (header NaN passes)"],
["//tr[number(td[2]) < 30]", 1, "< numeric"],
["//tr[number(td[2]) <= 30]", 2, "<= numeric"],
["//tr[number(td[2]) > 30]", 1, "> numeric"],
["//tr[number(td[2]) >= 30]", 2, ">= numeric"],
["//tr[td[2] = 30]", 1, "string-vs-number coercion"],
["//tr[td[2] = '30']", 1, "string-vs-string equality"],
// Node tests
["//comment()", 1, "comment() node test"],
// Capybara-style real-world expressions
[".//a[contains(normalize-space(string(.)), 'Click me')]", 1, "Capybara link locator"],
[".//input[(./@type = 'text')]", 1, "Capybara text-field"],
[".//*[@id='heading']", 1, "find-by-id"],
[".//li[contains(concat(' ', @class, ' '), ' even ')]", 2, "class contains pattern"],
// Tricky / edge
["//*[@id='heading']/text()", 1, "text() child of element"],
["//em/parent::p", 1, "parent of inline"],
["//p[em]", 1, "p with em descendant"],
["//p[not(em)]", 4, "p without em"],
["//section[a/@href = '/foo']", 1, "deep attribute eq"],
["//ul/li[last()][position() = last()]", 1, "double last()"],
["//ul[string(count(li)) = '5']", 1, "string() of number"],
["//body[count(//*[contains(@class, 'item')]) = 5]", 1, "nested count of contains()"],
];
for (const [xp, expected, desc] of cases) {
let r;
try {
r = document.evaluate(xp, document, null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
} catch (e) {
testing.fail(`[${desc}] ${xp} → threw: ${(e && e.message) || e}`);
}
if (r.snapshotLength !== expected) {
testing.fail(`[${desc}] ${xp} → got ${r.snapshotLength}, expected ${expected}`);
}
}
testing.expectEqual(91, cases.length);
}
</script>
</body>
</html>

View File

@@ -0,0 +1,103 @@
<!DOCTYPE html>
<body>
<script src="../testing.js"></script>
<h1 id=h>Hello</h1>
<p id=p1>One</p>
<p id=p2>Two</p>
</body>
<script id=create_expression>
{
const expr = document.createExpression("//p", null);
testing.expectEqual(true, expr instanceof XPathExpression);
testing.expectEqual('function', typeof expr.evaluate);
}
</script>
<script id=expression_evaluate>
{
const expr = document.createExpression("//p", null);
const r = expr.evaluate(document, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
testing.expectEqual(true, r instanceof XPathResult);
testing.expectEqual(2, r.snapshotLength);
}
</script>
<script id=expression_reuse_distinct_types>
{
// The cached AST should support multiple evaluations against
// different requested types.
const expr = document.createExpression("//p", null);
const snap = expr.evaluate(document, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
testing.expectEqual(2, snap.snapshotLength);
const iter = expr.evaluate(document, XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
testing.expectEqual('p1', iter.iterateNext().id);
testing.expectEqual('p2', iter.iterateNext().id);
const first = expr.evaluate(document, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
testing.expectEqual('p1', first.singleNodeValue.id);
}
</script>
<script id=expression_reuse_distinct_contexts>
{
// Re-evaluating with a different context node should rescope.
const expr = document.createExpression("p", null);
const all = expr.evaluate(document.body, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
testing.expectEqual(2, all.snapshotLength);
const empty = expr.evaluate(document.querySelector('h1'),
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
testing.expectEqual(0, empty.snapshotLength);
}
</script>
<script id=create_ns_resolver>
{
const resolver = document.createNSResolver(document);
testing.expectEqual(document, resolver);
const elt = document.createNSResolver(document.body);
testing.expectEqual(document.body, elt);
}
</script>
<script id=new_xpath_evaluator>
{
const ev = new XPathEvaluator();
testing.expectEqual(true, ev instanceof XPathEvaluator);
testing.expectEqual('function', typeof ev.evaluate);
testing.expectEqual('function', typeof ev.createExpression);
testing.expectEqual('function', typeof ev.createNSResolver);
}
</script>
<script id=evaluator_evaluate>
{
const ev = new XPathEvaluator();
const r = ev.evaluate("//p", document, null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
testing.expectEqual(true, r instanceof XPathResult);
testing.expectEqual(2, r.snapshotLength);
testing.expectEqual('p1', r.snapshotItem(0).id);
}
</script>
<script id=evaluator_create_expression>
{
const ev = new XPathEvaluator();
const expr = ev.createExpression("count(//p)", null);
testing.expectEqual(true, expr instanceof XPathExpression);
const r = expr.evaluate(document, XPathResult.NUMBER_TYPE, null);
testing.expectEqual(2, r.numberValue);
}
</script>
<script id=evaluator_create_ns_resolver>
{
const ev = new XPathEvaluator();
testing.expectEqual(document, ev.createNSResolver(document));
}
</script>

View File

@@ -0,0 +1,171 @@
<!DOCTYPE html>
<html>
<head>
<title>XPath perf benchmark</title>
<script src="../testing.js"></script>
</head>
<body>
<!--
Micro-benchmark for the XPath evaluator.
Builds a deterministic DOM (TREE_SIZE = 500), then runs each query
a few warmup iterations followed by ITERATIONS timed iterations.
Reports mean µs/iter via console.warn (the test harness sets
log level to .warn and silently drops info-level lines, so
console.log doesn't surface; console.warn does). A snapshotLength
mismatch fails the test loudly via testing.fail so a regression in
result count can't be hidden by the timing line.
Tun run, uncomment the test in XPathResult.zig (bottom of the file), then:
Run: make test F="#xpath_perf"
Query shapes target the optimization roadmap:
//*[@id='x'] — global ID lookup (fast-path candidate)
//tag[@id='x'] — typed ID lookup (fast-path candidate)
//tag — pure descendant tag scan
//*[@class='x'] — non-ID attribute filter (no fast path)
(//tag)[1] / [last()] — early-exit candidates (iterator opt)
count(//tag) — early-exit candidate (iterator opt)
Plus a few that should NOT change so we can detect regressions.
Tuning: keep TREE_SIZE × ITERATIONS small enough that the test
finishes in <1s on debug builds. Adjust ITERATIONS up if numbers
are noisy.
-->
<script id=xpath_perf_setup>
{
const TREE_SIZE = 500;
const TAGS = ["div", "span", "p"];
const CLASSES = ["alpha", "beta", "gamma"];
const TARGET_INDEX = 250;
// Expose the construction parameters for the run script.
window.__perf = { TREE_SIZE, TAGS, CLASSES, TARGET_INDEX };
// Decorrelate tag (period 3) and class (period 4) so that
// //div[@class='alpha'] is not a degenerate restatement of //div.
function tagOf(i) { return TAGS[i % TAGS.length]; }
function classOf(i) { return CLASSES[(i % 4) % CLASSES.length]; }
// Pre-compute expected counts so the assertions don't have to
// re-derive the formula. Stored on window.__perf for the run script.
const tags = Object.fromEntries(TAGS.map(t => [t, 0]));
const classes = Object.fromEntries(CLASSES.map(c => [c, 0]));
const cross = {};
for (let i = 0; i < TREE_SIZE; i++) {
const t = tagOf(i), c = classOf(i);
tags[t]++;
classes[c]++;
const k = `${t}-${c}`;
cross[k] = (cross[k] || 0) + 1;
}
window.__perf.tags = tags;
window.__perf.classes = classes;
window.__perf.cross = cross;
window.__perf.targetTag = tagOf(TARGET_INDEX);
// Build the fixture body via innerHTML in one shot. Faster than
// createElement loops because html5ever parses the whole string at
// once and we don't pay per-element bridge crossings.
const parts = [];
for (let i = 0; i < TREE_SIZE; i++) {
const id = (i === TARGET_INDEX) ? "target" : `n${i}`;
parts.push(`<${tagOf(i)} id="${id}" class="${classOf(i)}">item ${i}</${tagOf(i)}>`);
}
// Use <main> as the wrapper so //div, //span, //p count only the
// generated children (the wrapper itself doesn't share a tag with
// any test query).
const root = document.createElement("main");
root.id = "perf_root";
root.innerHTML = parts.join("");
document.body.appendChild(root);
testing.expectEqual(TREE_SIZE, root.children.length);
}
</script>
<script id=xpath_perf_run>
{
const ITERATIONS = 50;
const WARMUP = 3;
const { TREE_SIZE, tags, classes, cross, targetTag } = window.__perf;
const cases = [
// --- ID lookups (fast-path #1 candidates) ---
{ xp: "//*[@id='target']", expect: 1, label: "id-any" },
{ xp: `//${targetTag}[@id='target']`, expect: 1, label: "id-typed-hit" },
{ xp: "//div[@id='target']", expect: targetTag === "div" ? 1 : 0, label: "id-typed-miss" },
// --- Pure tag descendant ---
{ xp: "//div", expect: tags.div, label: "tag-descendant" },
{ xp: "//span", expect: tags.span, label: "tag-descendant-span" },
{ xp: "//*", expect: null, label: "universal-descendant" },
// --- Attribute equality (no fast path planned) ---
{ xp: "//*[@class='alpha']", expect: classes.alpha, label: "class-eq-any" },
{ xp: "//div[@class='alpha']", expect: cross["div-alpha"] || 0, label: "class-eq-div" },
// --- Early-exit candidates (iterator optimization) ---
{ xp: "(//div)[1]", expect: 1, label: "first-of-many" },
{ xp: "(//div)[last()]", expect: 1, label: "last-of-many" },
// --- Functions in predicate (regression guard) ---
{ xp: "//div[contains(@class,'alpha')]", expect: cross["div-alpha"] || 0, label: "contains-class" },
{ xp: "//div[starts-with(@id,'n')]", expect: tags.div - (targetTag === "div" ? 1 : 0), label: "starts-with-id" },
// --- Counting (number result, iterator early-exit candidate) ---
{ xp: "count(//div)", expect: tags.div, label: "count" },
];
function pad(s, w) { s = String(s); return s.length >= w ? s : s + " ".repeat(w - s.length); }
// Result extractor that handles both node-set and number results.
function evalAndCount(xp) {
const r = document.evaluate(xp, document, null, XPathResult.ANY_TYPE, null);
if (r.resultType === XPathResult.NUMBER_TYPE) return r.numberValue;
if (r.resultType === XPathResult.UNORDERED_NODE_ITERATOR_TYPE
|| r.resultType === XPathResult.ORDERED_NODE_ITERATOR_TYPE) {
let n = 0; while (r.iterateNext()) n++;
return n;
}
return null;
}
// Accumulate output into one big string. Per-call console.warn lines
// get clobbered by the test harness's interleaved prints; batching
// into a single warn call survives the interleaving intact.
const out = [];
out.push(`tree_size=${TREE_SIZE} iterations=${ITERATIONS} warmup=${WARMUP}`);
out.push(`${pad("label", 24)} ${pad("count", 8)} ${pad("µs/iter", 10)} xpath`);
for (const c of cases) {
let actual = null;
for (let i = 0; i < WARMUP; i++) {
actual = evalAndCount(c.xp);
}
if (c.expect !== null && actual !== c.expect) {
testing.fail(`[xpath-perf] ${c.label} returned ${actual}, expected ${c.expect}`);
}
// Time the full document.evaluate call. ANY_TYPE so we don't pay
// snapshot allocation we wouldn't pay in the realistic
// DOM.performSearch path either.
const t0 = performance.now();
for (let i = 0; i < ITERATIONS; i++) {
document.evaluate(c.xp, document, null, XPathResult.ANY_TYPE, null);
}
const t1 = performance.now();
const us_per_iter = ((t1 - t0) * 1000) / ITERATIONS;
out.push(`${pad(c.label, 24)} ${pad(actual ?? "-", 8)} ${pad(us_per_iter.toFixed(1), 10)} ${c.xp}`);
}
console.warn("[xpath-perf]\n" + out.map(l => "[xpath-perf] " + l).join("\n"));
testing.expectEqual(true, true);
}
</script>
</body>
</html>

View File

@@ -0,0 +1,193 @@
<!DOCTYPE html>
<body>
<script src="../testing.js"></script>
<h1 id=h>Hello</h1>
<p id=p1>One</p>
<p id=p2>Two</p>
</body>
<script id=type_constants>
{
testing.expectEqual(0, XPathResult.ANY_TYPE);
testing.expectEqual(1, XPathResult.NUMBER_TYPE);
testing.expectEqual(2, XPathResult.STRING_TYPE);
testing.expectEqual(3, XPathResult.BOOLEAN_TYPE);
testing.expectEqual(4, XPathResult.UNORDERED_NODE_ITERATOR_TYPE);
testing.expectEqual(5, XPathResult.ORDERED_NODE_ITERATOR_TYPE);
testing.expectEqual(6, XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE);
testing.expectEqual(7, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE);
testing.expectEqual(8, XPathResult.ANY_UNORDERED_NODE_TYPE);
testing.expectEqual(9, XPathResult.FIRST_ORDERED_NODE_TYPE);
}
</script>
<script id=instance_constants>
{
// Type constants are also exposed on instances.
const r = document.evaluate("//p", document, null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
testing.expectEqual(7, r.ORDERED_NODE_SNAPSHOT_TYPE);
testing.expectEqual(0, r.ANY_TYPE);
}
</script>
<script id=number_value>
{
const r = document.evaluate("count(//p)", document, null,
XPathResult.NUMBER_TYPE, null);
testing.expectEqual(XPathResult.NUMBER_TYPE, r.resultType);
testing.expectEqual(2, r.numberValue);
}
</script>
<script id=string_value>
{
const r = document.evaluate("string(//h1)", document, null,
XPathResult.STRING_TYPE, null);
testing.expectEqual(XPathResult.STRING_TYPE, r.resultType);
testing.expectEqual('Hello', r.stringValue);
}
</script>
<script id=boolean_value>
{
const r = document.evaluate("count(//p) > 0", document, null,
XPathResult.BOOLEAN_TYPE, null);
testing.expectEqual(XPathResult.BOOLEAN_TYPE, r.resultType);
testing.expectEqual(true, r.booleanValue);
}
</script>
<script id=unordered_iterator>
{
const r = document.evaluate("//p", document, null,
XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null);
testing.expectEqual(XPathResult.UNORDERED_NODE_ITERATOR_TYPE, r.resultType);
testing.expectEqual('p1', r.iterateNext().id);
testing.expectEqual('p2', r.iterateNext().id);
testing.expectEqual(null, r.iterateNext());
testing.expectEqual(false, r.invalidIteratorState);
}
</script>
<script id=ordered_iterator>
{
const r = document.evaluate("//p", document, null,
XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
testing.expectEqual(XPathResult.ORDERED_NODE_ITERATOR_TYPE, r.resultType);
testing.expectEqual('p1', r.iterateNext().id);
testing.expectEqual('p2', r.iterateNext().id);
testing.expectEqual(null, r.iterateNext());
}
</script>
<script id=unordered_snapshot>
{
const r = document.evaluate("//p", document, null,
XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, null);
testing.expectEqual(XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, r.resultType);
testing.expectEqual(2, r.snapshotLength);
testing.expectEqual('p1', r.snapshotItem(0).id);
testing.expectEqual('p2', r.snapshotItem(1).id);
testing.expectEqual(null, r.snapshotItem(2));
}
</script>
<script id=ordered_snapshot>
{
const r = document.evaluate("//p", document, null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
testing.expectEqual(XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, r.resultType);
testing.expectEqual(2, r.snapshotLength);
testing.expectEqual('p1', r.snapshotItem(0).id);
testing.expectEqual('p2', r.snapshotItem(1).id);
}
</script>
<script id=any_unordered_node>
{
const r = document.evaluate("//p", document, null,
XPathResult.ANY_UNORDERED_NODE_TYPE, null);
testing.expectEqual(XPathResult.ANY_UNORDERED_NODE_TYPE, r.resultType);
testing.expectEqual('p1', r.singleNodeValue.id);
const empty = document.evaluate("//nope", document, null,
XPathResult.ANY_UNORDERED_NODE_TYPE, null);
testing.expectEqual(null, empty.singleNodeValue);
}
</script>
<script id=first_ordered_node>
{
const r = document.evaluate("//p", document, null,
XPathResult.FIRST_ORDERED_NODE_TYPE, null);
testing.expectEqual(XPathResult.FIRST_ORDERED_NODE_TYPE, r.resultType);
testing.expectEqual('p1', r.singleNodeValue.id);
}
</script>
<script id=any_type_resolution>
{
// ANY_TYPE settles into the natural category of the result.
const ns = document.evaluate("//p", document, null,
XPathResult.ANY_TYPE, null);
testing.expectEqual(XPathResult.UNORDERED_NODE_ITERATOR_TYPE, ns.resultType);
const num = document.evaluate("count(//p)", document, null,
XPathResult.ANY_TYPE, null);
testing.expectEqual(XPathResult.NUMBER_TYPE, num.resultType);
testing.expectEqual(2, num.numberValue);
const str = document.evaluate("string(//h1)", document, null,
XPathResult.ANY_TYPE, null);
testing.expectEqual(XPathResult.STRING_TYPE, str.resultType);
testing.expectEqual('Hello', str.stringValue);
const bool = document.evaluate("true()", document, null,
XPathResult.ANY_TYPE, null);
testing.expectEqual(XPathResult.BOOLEAN_TYPE, bool.resultType);
testing.expectEqual(true, bool.booleanValue);
}
</script>
<script id=type_mismatch_throws>
{
const r = document.evaluate("//p", document, null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
// Wrong-typed accessors throw DOMException(InvalidStateError).
let n = 0;
try { r.numberValue; } catch (e) { n++; }
try { r.stringValue; } catch (e) { n++; }
try { r.booleanValue; } catch (e) { n++; }
try { r.singleNodeValue; } catch (e) { n++; }
try { r.iterateNext(); } catch (e) { n++; }
testing.expectEqual(5, n);
}
</script>
<script id=node_set_for_scalar_throws>
{
// Requesting a node-set type for a scalar expression rejects.
let threw = false;
try {
document.evaluate("count(//p)", document, null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
} catch (e) {
threw = true;
}
testing.expectEqual(true, threw);
}
</script>
<script id=iterator_exhaustion>
{
const r = document.evaluate("//p", document, null,
XPathResult.ORDERED_NODE_ITERATOR_TYPE, null);
testing.expectEqual('p1', r.iterateNext().id);
testing.expectEqual('p2', r.iterateNext().id);
testing.expectEqual(null, r.iterateNext());
// Re-calling on an exhausted iterator stays at null.
testing.expectEqual(null, r.iterateNext());
}
</script>

View File

@@ -35,6 +35,8 @@ const DOMImplementation = @import("DOMImplementation.zig");
const StyleSheetList = @import("css/StyleSheetList.zig");
const FontFaceSet = @import("css/FontFaceSet.zig");
const Selection = @import("Selection.zig");
const XPathResult = @import("XPathResult.zig");
const XPathExpression = @import("XPathExpression.zig");
pub const XMLDocument = @import("XMLDocument.zig");
pub const HTMLDocument = @import("HTMLDocument.zig");
@@ -412,6 +414,44 @@ pub fn createNodeIterator(_: *const Document, root: *Node, what_to_show: ?js.Val
return DOMNodeIterator.init(root, try whatToShow(what_to_show), filter, frame);
}
pub fn evaluate(
self: *Document,
expression: []const u8,
context_node: ?*Node,
resolver: ?js.Function,
result_type: ?u16,
result: ?*XPathResult,
frame: *Frame,
) !*XPathResult {
// resolver/result are no-ops in HTML mode (decision #2).
// Null/missing context_node falls back to the document — matches the
// polyfill (decision #2). Firefox throws TypeError on a *missing*
// arg, but the bridge can't distinguish "missing" from "explicit
// null" here, so polyfill parity wins for the ambiguity.
_ = resolver;
_ = result;
return XPathResult.fromExpression(
expression,
context_node orelse self.asNode(),
result_type orelse XPathResult.ANY_TYPE,
frame,
);
}
pub fn createExpression(
_: *const Document,
expression: []const u8,
resolver: ?js.Function,
frame: *Frame,
) !*XPathExpression {
_ = resolver;
return XPathExpression.init(expression, frame);
}
pub fn createNSResolver(_: *const Document, node: *Node) ?*Node {
return node;
}
fn whatToShow(value_: ?js.Value) !u32 {
const value = value_ orelse return 4294967295; // show all when undefined
if (value.isUndefined()) {
@@ -1053,6 +1093,9 @@ pub const JsApi = struct {
pub const createEvent = bridge.function(Document.createEvent, .{ .dom_exception = true });
pub const createTreeWalker = bridge.function(Document.createTreeWalker, .{});
pub const createNodeIterator = bridge.function(Document.createNodeIterator, .{});
pub const evaluate = bridge.function(Document.evaluate, .{ .dom_exception = true });
pub const createExpression = bridge.function(Document.createExpression, .{ .dom_exception = true });
pub const createNSResolver = bridge.function(Document.createNSResolver, .{});
pub const getElementById = bridge.function(_getElementById, .{});
fn _getElementById(self: *Document, value_: ?js.Value, frame: *Frame) !?*Element {
const value = value_ orelse return null;
@@ -1113,3 +1156,7 @@ const testing = @import("../../testing.zig");
test "WebApi: Document" {
try testing.htmlRunner("document", .{});
}
test "WebApi: Document.evaluate" {
try testing.htmlRunner("xpath/document_evaluate.html", .{});
}

View File

@@ -0,0 +1,97 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! WHATWG `XPathEvaluator` — a stateless factory for XPath evaluation.
//! Mirrors `Document.evaluate` / `Document.createExpression` /
//! `Document.createNSResolver` so an explicit
//! `new XPathEvaluator()` instance can be used in place of the
//! document.
const std = @import("std");
const js = @import("../js/js.zig");
const Frame = @import("../Frame.zig");
const Node = @import("Node.zig");
const XPathResult = @import("XPathResult.zig");
const XPathExpression = @import("XPathExpression.zig");
const XPathEvaluator = @This();
// Padding to avoid zero-size struct identity_map collisions (matches
// the convention in ResizeObserver.zig).
_pad: bool = false,
pub fn init() XPathEvaluator {
return .{};
}
pub fn evaluate(
_: *const XPathEvaluator,
expression: []const u8,
context_node: *Node,
resolver: ?js.Function,
requested_type: ?u16,
result: ?*XPathResult,
frame: *Frame,
) !*XPathResult {
// Namespace resolver is accepted-and-ignored (HTML mode — decision #2).
// Result reuse is also a no-op; XPathResult.fromExpression always
// allocates a fresh instance.
_ = resolver;
_ = result;
return XPathResult.fromExpression(expression, context_node, requested_type orelse XPathResult.ANY_TYPE, frame);
}
pub fn createExpression(
_: *const XPathEvaluator,
expression: []const u8,
resolver: ?js.Function,
frame: *Frame,
) !*XPathExpression {
_ = resolver;
return XPathExpression.init(expression, frame);
}
pub fn createNSResolver(_: *const XPathEvaluator, node: *Node) ?*Node {
// HTML-mode passthrough — the WHATWG IDL accepts a Node and returns
// an `XPathNSResolver`, but in practice the input node is reused.
return node;
}
pub const JsApi = struct {
pub const bridge = js.Bridge(XPathEvaluator);
pub const Meta = struct {
pub const name = "XPathEvaluator";
pub const prototype_chain = bridge.prototypeChain();
pub var class_id: bridge.ClassId = undefined;
pub const empty_with_no_proto = true;
};
pub const constructor = bridge.constructor(XPathEvaluator.init, .{});
pub const evaluate = bridge.function(XPathEvaluator.evaluate, .{ .dom_exception = true });
pub const createExpression = bridge.function(XPathEvaluator.createExpression, .{ .dom_exception = true });
pub const createNSResolver = bridge.function(XPathEvaluator.createNSResolver, .{});
};
const testing = @import("../../testing.zig");
test "WebApi: XPathEvaluator + XPathExpression" {
try testing.htmlRunner("xpath/xpath_evaluator.html", .{});
}

View File

@@ -0,0 +1,105 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! WHATWG `XPathExpression` — a parsed XPath expression cached for
//! repeated evaluation. The parsed AST lives in this object's per-
//! instance arena (long-lived); each `evaluate()` call gets a fresh
//! arena for its own result data so multiple evaluations don't grow
//! the AST arena.
const std = @import("std");
const lp = @import("lightpanda");
const js = @import("../js/js.zig");
const Page = @import("../Page.zig");
const Frame = @import("../Frame.zig");
const Node = @import("Node.zig");
const XPathResult = @import("XPathResult.zig");
const xpath = struct {
const Ast = @import("../xpath/ast.zig");
const Parser = @import("../xpath/Parser.zig");
const Evaluator = @import("../xpath/Evaluator.zig");
};
const Allocator = std.mem.Allocator;
const XPathExpression = @This();
_rc: lp.RC(u8) = .{},
_arena: Allocator,
_expr: *const xpath.Ast.Expr,
pub fn init(expression: []const u8, frame: *Frame) !*XPathExpression {
const arena = try frame.getArena(.tiny, "XPathExpression");
errdefer frame.releaseArena(arena);
// The AST borrows string slices from its input (literals, names,
// var refs, function names). `expression` is materialized in the JS
// call_arena and is reclaimed when the top-level call returns, so
// dupe into our long-lived arena before parsing.
const owned = try arena.dupe(u8, expression);
const expr = try xpath.Parser.parse(arena, owned);
const xe = try arena.create(XPathExpression);
xe.* = .{ ._arena = arena, ._expr = expr };
return xe;
}
pub fn evaluate(
self: *XPathExpression,
context_node: *Node,
requested_type: ?u16,
result: ?*XPathResult,
frame: *Frame,
) !*XPathResult {
// The `result` reuse parameter (WHATWG: optional XPathResult to
// populate) is accepted-and-ignored: we always allocate fresh,
// which matches every modern browser's effective behavior.
_ = result;
const arena = try frame.getArena(.medium, "XPathResult");
errdefer frame.releaseArena(arena);
const eval_result = try xpath.Evaluator.evaluate(arena, self._expr, context_node, frame);
return XPathResult.fromResult(arena, requested_type orelse XPathResult.ANY_TYPE, eval_result);
}
pub fn deinit(self: *XPathExpression, page: *Page) void {
page.releaseArena(self._arena);
}
pub fn acquireRef(self: *XPathExpression) void {
self._rc.acquire();
}
pub fn releaseRef(self: *XPathExpression, page: *Page) void {
self._rc.release(self, page);
}
pub const JsApi = struct {
pub const bridge = js.Bridge(XPathExpression);
pub const Meta = struct {
pub const name = "XPathExpression";
pub const prototype_chain = bridge.prototypeChain();
pub var class_id: bridge.ClassId = undefined;
};
pub const evaluate = bridge.function(XPathExpression.evaluate, .{ .dom_exception = true });
};

View File

@@ -0,0 +1,288 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! WHATWG `XPathResult` (full surface, all 10 type constants — decision
//! #4). Wraps the evaluator's `result.Result` for JS consumption:
//! coerces to the requested result type at construction, exposes the
//! type-tagged accessors, and serves the iterator/snapshot APIs.
//!
//! Lifetime model: each `XPathResult` owns a per-instance arena
//! (`getArena(.medium, ...)`) that holds both the struct and the result
//! data (node-set slice, formatted strings). The arena is released in
//! `deinit` once the JS wrapper's refcount hits zero.
//!
//! Type-mismatch accessor calls return `error.InvalidStateError` —
//! translated to a `DOMException` by `bridge.function(.., .{
//! .dom_exception = true })`. The WHATWG IDL technically specifies
//! `TypeError` for type mismatches, but `InvalidStateError` is what
//! decision #4 captures and what most legacy XPath consumers expect.
const std = @import("std");
const lp = @import("lightpanda");
const js = @import("../js/js.zig");
const Page = @import("../Page.zig");
const Frame = @import("../Frame.zig");
const Node = @import("Node.zig");
// XPath runtime helpers. Aliased to keep the cross-directory imports
// readable when both modules expose a `Result` type.
const xpath = struct {
const result = @import("../xpath/result.zig");
const Parser = @import("../xpath/Parser.zig");
const Evaluator = @import("../xpath/Evaluator.zig");
};
const Allocator = std.mem.Allocator;
const XPathResult = @This();
// WHATWG type constants. ANY_TYPE is a request flag — at construction
// it resolves to one of the four concrete categories (NUMBER, STRING,
// BOOLEAN, UNORDERED_NODE_ITERATOR) depending on what the expression
// produced.
pub const ANY_TYPE: u16 = 0;
pub const NUMBER_TYPE: u16 = 1;
pub const STRING_TYPE: u16 = 2;
pub const BOOLEAN_TYPE: u16 = 3;
pub const UNORDERED_NODE_ITERATOR_TYPE: u16 = 4;
pub const ORDERED_NODE_ITERATOR_TYPE: u16 = 5;
pub const UNORDERED_NODE_SNAPSHOT_TYPE: u16 = 6;
pub const ORDERED_NODE_SNAPSHOT_TYPE: u16 = 7;
pub const ANY_UNORDERED_NODE_TYPE: u16 = 8;
pub const FIRST_ORDERED_NODE_TYPE: u16 = 9;
const Value = union(enum) {
number: f64,
string: []const u8,
boolean: bool,
nodes: []const *Node,
};
_rc: lp.RC(u8) = .{},
_arena: Allocator,
_type: u16,
_value: Value,
_iter_pos: usize = 0,
// ----- constructors -----
/// One-shot: parse + evaluate + wrap. Used by `Document.evaluate` and
/// `XPathEvaluator.evaluate`. Allocates a per-instance arena for the
/// AST + result data + the struct itself.
pub fn fromExpression(
expression: []const u8,
context_node: *Node,
requested_type: u16,
frame: *Frame,
) !*XPathResult {
const arena = try frame.getArena(.medium, "XPathResult");
errdefer frame.releaseArena(arena);
// The AST borrows string slices from its input (literals, names,
// var refs, function names). `expression` is materialized in the JS
// call_arena and is reclaimed when the top-level call returns, so
// dupe into our long-lived arena before parsing.
const owned = try arena.dupe(u8, expression);
const expr = try xpath.Parser.parse(arena, owned);
const result = try xpath.Evaluator.evaluate(arena, expr, context_node, frame);
return fromResult(arena, requested_type, result);
}
/// Wrap an already-evaluated `result.result` into an XPathResult. The
/// caller hands over ownership of `arena` — the XPathResult will release
/// it on deinit. Used by `XPathExpression.evaluate` (which has its own
/// AST cache and only allocates a fresh result arena).
pub fn fromResult(
arena: Allocator,
requested_type: u16,
result: xpath.result.Result,
) !*XPathResult {
const value: Value = switch (requested_type) {
ANY_TYPE => switch (result) {
.number => |n| .{ .number = n },
.string => |s| .{ .string = s },
.boolean => |b| .{ .boolean = b },
.node_set => |ns| .{ .nodes = ns },
},
NUMBER_TYPE => .{ .number = try xpath.result.toNumber(arena, result) },
STRING_TYPE => .{ .string = try xpath.result.toString(arena, result) },
BOOLEAN_TYPE => .{ .boolean = xpath.result.toBoolean(result) },
UNORDERED_NODE_ITERATOR_TYPE,
ORDERED_NODE_ITERATOR_TYPE,
UNORDERED_NODE_SNAPSHOT_TYPE,
ORDERED_NODE_SNAPSHOT_TYPE,
ANY_UNORDERED_NODE_TYPE,
FIRST_ORDERED_NODE_TYPE,
=> switch (result) {
.node_set => |ns| .{ .nodes = ns },
// Requesting a node-set type for a non-node-set expression.
// WHATWG specifies TypeError, but DOMException.fromError has
// no TypeError mapping (would surface as a plain JS Error);
// unify on InvalidStateError per the project plan.
else => return error.InvalidStateError,
},
else => return error.InvalidStateError,
};
const final_type: u16 = if (requested_type == ANY_TYPE) switch (value) {
.number => NUMBER_TYPE,
.string => STRING_TYPE,
.boolean => BOOLEAN_TYPE,
.nodes => UNORDERED_NODE_ITERATOR_TYPE,
} else requested_type;
const xr = try arena.create(XPathResult);
xr.* = .{
._arena = arena,
._type = final_type,
._value = value,
};
return xr;
}
// ----- lifecycle -----
pub fn deinit(self: *XPathResult, page: *Page) void {
page.releaseArena(self._arena);
}
pub fn acquireRef(self: *XPathResult) void {
self._rc.acquire();
}
pub fn releaseRef(self: *XPathResult, page: *Page) void {
self._rc.release(self, page);
}
// ----- accessors -----
fn getResultType(self: *const XPathResult) u16 {
return self._type;
}
fn getNumberValue(self: *const XPathResult) !f64 {
if (self._type != NUMBER_TYPE) return error.InvalidStateError;
return self._value.number;
}
fn getStringValue(self: *const XPathResult) ![]const u8 {
if (self._type != STRING_TYPE) return error.InvalidStateError;
return self._value.string;
}
fn getBooleanValue(self: *const XPathResult) !bool {
if (self._type != BOOLEAN_TYPE) return error.InvalidStateError;
return self._value.boolean;
}
fn getSingleNodeValue(self: *const XPathResult) !?*Node {
if (self._type != ANY_UNORDERED_NODE_TYPE and self._type != FIRST_ORDERED_NODE_TYPE) {
return error.InvalidStateError;
}
return if (self._value.nodes.len == 0) null else self._value.nodes[0];
}
fn getSnapshotLength(self: *const XPathResult) !u32 {
if (self._type != UNORDERED_NODE_SNAPSHOT_TYPE and self._type != ORDERED_NODE_SNAPSHOT_TYPE) {
return error.InvalidStateError;
}
return @intCast(self._value.nodes.len);
}
/// Live mutation tracking on the iterator isn't implemented — we hold a
/// frozen pointer slice, so the iterator is never "invalidated" by DOM
/// edits during traversal. Always returns false; matches the polyfill,
/// which is snapshot-only.
fn getInvalidIteratorState(_: *const XPathResult) bool {
return false;
}
// ----- methods -----
pub fn iterateNext(self: *XPathResult) !?*Node {
if (self._type != UNORDERED_NODE_ITERATOR_TYPE and self._type != ORDERED_NODE_ITERATOR_TYPE) {
return error.InvalidStateError;
}
const pos = self._iter_pos;
if (pos >= self._value.nodes.len) return null;
const node = self._value.nodes[pos];
self._iter_pos = pos + 1;
return node;
}
pub fn snapshotItem(self: *const XPathResult, index: u32) !?*Node {
if (self._type != UNORDERED_NODE_SNAPSHOT_TYPE and self._type != ORDERED_NODE_SNAPSHOT_TYPE) {
return error.InvalidStateError;
}
if (index >= self._value.nodes.len) return null;
return self._value.nodes[index];
}
// ----- JS bridge -----
pub const JsApi = struct {
pub const bridge = js.Bridge(XPathResult);
pub const Meta = struct {
pub const name = "XPathResult";
pub const prototype_chain = bridge.prototypeChain();
pub var class_id: bridge.ClassId = undefined;
};
// Type constants — both static (on the constructor) and instance
// properties per the WHATWG IDL. `template = true` makes them
// class-level so `XPathResult.ORDERED_NODE_SNAPSHOT_TYPE` works.
pub const ANY_TYPE = bridge.property(XPathResult.ANY_TYPE, .{ .template = true });
pub const NUMBER_TYPE = bridge.property(XPathResult.NUMBER_TYPE, .{ .template = true });
pub const STRING_TYPE = bridge.property(XPathResult.STRING_TYPE, .{ .template = true });
pub const BOOLEAN_TYPE = bridge.property(XPathResult.BOOLEAN_TYPE, .{ .template = true });
pub const UNORDERED_NODE_ITERATOR_TYPE = bridge.property(XPathResult.UNORDERED_NODE_ITERATOR_TYPE, .{ .template = true });
pub const ORDERED_NODE_ITERATOR_TYPE = bridge.property(XPathResult.ORDERED_NODE_ITERATOR_TYPE, .{ .template = true });
pub const UNORDERED_NODE_SNAPSHOT_TYPE = bridge.property(XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE, .{ .template = true });
pub const ORDERED_NODE_SNAPSHOT_TYPE = bridge.property(XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, .{ .template = true });
pub const ANY_UNORDERED_NODE_TYPE = bridge.property(XPathResult.ANY_UNORDERED_NODE_TYPE, .{ .template = true });
pub const FIRST_ORDERED_NODE_TYPE = bridge.property(XPathResult.FIRST_ORDERED_NODE_TYPE, .{ .template = true });
pub const resultType = bridge.accessor(XPathResult.getResultType, null, .{});
pub const numberValue = bridge.accessor(XPathResult.getNumberValue, null, .{ .dom_exception = true });
pub const stringValue = bridge.accessor(XPathResult.getStringValue, null, .{ .dom_exception = true });
pub const booleanValue = bridge.accessor(XPathResult.getBooleanValue, null, .{ .dom_exception = true });
pub const singleNodeValue = bridge.accessor(XPathResult.getSingleNodeValue, null, .{ .dom_exception = true });
pub const snapshotLength = bridge.accessor(XPathResult.getSnapshotLength, null, .{ .dom_exception = true });
pub const invalidIteratorState = bridge.accessor(XPathResult.getInvalidIteratorState, null, .{});
pub const iterateNext = bridge.function(XPathResult.iterateNext, .{ .dom_exception = true });
pub const snapshotItem = bridge.function(XPathResult.snapshotItem, .{ .dom_exception = true });
};
const testing = @import("../../testing.zig");
test "WebApi: XPathResult" {
try testing.htmlRunner("xpath/xpath_result.html", .{});
}
test "WebApi: XPath conformance" {
try testing.htmlRunner("xpath/xpath_conformance.html", .{});
}
// This uses console.warn, uncomment if you want to run it
// test "WebApi: XPath perf" {
// try testing.htmlRunner("xpath/xpath_perf.html", .{});
// }

View File

@@ -0,0 +1,987 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! XPath 1.0 evaluator — runs an `ast.Expr` against a context node and
//! produces a `Result`. The evaluator allocates intermediate values
//! (node-set slices, formatted numbers, materialized attribute nodes)
//! into the caller's arena. The context `Frame` is needed for
//! `getElementById` and to materialize attributes (the attribute axis
//! returns full `Attribute` nodes so the result is `*Node`-uniform).
//!
//! Document-order sort happens once at the public boundary
//! (`evaluate()`); intermediate step results stay in axis order so
//! reverse-axis positional predicates evaluate against proximity.
const std = @import("std");
const lp = @import("lightpanda");
const Node = @import("../webapi/Node.zig");
const ast = @import("ast.zig");
const Parser = @import("Parser.zig");
const result = @import("result.zig");
const functions = @import("functions.zig");
const Frame = lp.Frame;
const Element = Node.Element;
const Document = Node.Document;
const Allocator = std.mem.Allocator;
const Evaluator = @This();
pub const Error = error{
OutOfMemory,
WriteFailed,
// Surfaces from Attribute materialization (`Entry.toAttribute` →
// `String.dupe` enforces a length limit). The polyfill never hits
// this since JS strings are unbounded, but Lightpanda's `String`
// type caps at u32::MAX bytes — propagate so callers can surface
// a DOM exception.
StringTooLarge,
UnknownFunction,
UnionRequiresNodeSets,
};
arena: Allocator,
frame: *Frame,
/// Public entry. Returns the AST's value; node-sets are sorted into
/// document order before return per XPath spec §3.3.
pub fn evaluate(arena: Allocator, expr: *const ast.Expr, context_node: *Node, frame: *Frame) Error!result.Result {
var ev = Evaluator{ .arena = arena, .frame = frame };
const res = try ev.evalExpr(expr, context_node, 1, 1);
if (res == .node_set) {
sortDocOrder(@constCast(res.node_set));
}
return res;
}
pub const SearchError = Error || Parser.Error;
/// Convenience for `DOM.performSearch`: parse + evaluate and unwrap the
/// node-set. Top-level scalar expressions yield an empty slice
/// (decision #3 — these APIs are for finding nodes, not arbitrary
/// computation).
pub fn searchAll(arena: Allocator, root: *Node, expression: []const u8, frame: *Frame) SearchError![]const *Node {
const expr = try Parser.parse(arena, expression);
return switch (try evaluate(arena, expr, root, frame)) {
.node_set => |ns| ns,
else => &.{},
};
}
// ----- AST evaluation -----
fn evalExpr(self: *Evaluator, expr: *const ast.Expr, ctx: *Node, pos: usize, size: usize) Error!result.Result {
return switch (expr.*) {
.number => |n| .{ .number = n },
.literal => |s| .{ .string = s },
.var_ref => .{ .string = "" }, // decision #3 stub
.neg => |inner| blk: {
const v = try self.evalExpr(inner, ctx, pos, size);
const n = try result.toNumber(self.arena, v);
break :blk .{ .number = -n };
},
.binop => |bo| try self.evalBinop(bo, ctx, pos, size),
.path => |p| try self.evalPath(p, ctx),
.filter_path => |fp| try self.evalFilterPath(fp, ctx, pos, size),
.filter => |f| try self.evalFilter(f, ctx, pos, size),
.fn_call => |fc| try self.evalFnCall(fc, ctx, pos, size),
};
}
fn evalPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!result.Result {
if (try self.tryIdLookupFastPath(path, ctx)) |res| return res;
if (try self.tryFusedDescendantFastPath(path, ctx)) |res| return res;
const start: *Node = if (path.absolute) blk: {
if (ctx._type == .document) break :blk ctx;
const owner = ctx.ownerDocument(self.frame) orelse break :blk ctx;
break :blk owner.asNode();
} else ctx;
var current = try self.arena.alloc(*Node, 1);
current[0] = start;
var current_set: []const *Node = current;
for (path.steps) |step| {
const r = try self.evalStep(current_set, step);
current_set = r.node_set;
}
return .{ .node_set = current_set };
}
// Recognize the very common `//tag[@id='x']` and `.//tag[@id='x']`
// shapes (and their wildcard `//*[@id='x']` variants) and serve them
// directly from `frame.getElementByIdFromNode`. Accepts the literal on
// either side of `=`.
//
// Mirrors the same tradeoff `webapi/selector/List.zig:optimizeSelector`
// already makes for `querySelector(All)`: the id-map only stores the
// first element per ID in document order, so duplicate IDs (invalid
// HTML, but possible) yield one match here where a strict tree walk
// would find all. Acceptable because Capybara/Selenium hot paths
// assume unique IDs and CSS has shipped this compromise for years.
//
// Falls through to the general path for any deviation: extra steps,
// extra predicates, non-eq predicate, non-literal RHS, or the
// inability to resolve a search root.
fn tryIdLookupFastPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!?result.Result {
// Two acceptable AST shapes:
// //tag[@id='x'] parses to: ds::node() / child::tag[pred]
// .//tag[@id='x'] parses to: self::node() / ds::node() / child::tag[pred]
const target = matchDescendantPathShape(path) orelse return null;
if (target.axis != .child) return null;
if (target.predicates.len != 1) return null;
// Tag name (null = wildcard "*"). type_test (e.g. `node()`,
// `text()`) doesn't qualify because getElementByIdFromNode only
// returns elements.
const tag_name: ?[]const u8 = switch (target.node_test) {
.name => |n| if (std.mem.eql(u8, n, "*")) null else n,
.type_test => return null,
};
const id_value = matchAttrEqLiteral(target.predicates[0], "id") orelse return null;
// Resolve search root the same way the general path does.
const search_root: *Node = if (path.absolute) blk: {
if (ctx._type == .document) break :blk ctx;
const owner = ctx.ownerDocument(self.frame) orelse return null;
break :blk owner.asNode();
} else ctx;
const id_element = self.frame.getElementByIdFromNode(search_root, id_value) orelse {
return .{ .node_set = &.{} };
};
const id_node = id_element.asNode();
// Relative paths must filter to descendants of the context.
// getElementByIdFromNode is doc-wide.
if (search_root != id_node and !search_root.contains(id_node)) {
return .{ .node_set = &.{} };
}
// Tag check (case-insensitive per decision #2). Element tag names
// are stored lowercase via `getTagNameLower`; lowercase the AST
// name once and compare.
if (tag_name) |tag| {
const lowered = try std.ascii.allocLowerString(self.arena, tag);
if (!std.mem.eql(u8, lowered, id_element.getTagNameLower())) {
return .{ .node_set = &.{} };
}
}
const out = try self.arena.alloc(*Node, 1);
out[0] = id_node;
return .{ .node_set = out };
}
// Generalization of `tryIdLookupFastPath` to non-ID predicates. Same
// AST shape (`//<test>[preds]` / `.//<test>[preds]`), but instead of
// dispatching to `getElementByIdFromNode`, walks the descendants of
// the search root once in document order, applying the node test and
// any "safe" non-positional predicates inline. Skips the general path's
// per-step axis materialization, the per-step `filtered`/`current`
// ArrayLists, and the dedup hash map (single-context forward walk
// already preserves doc order).
//
// Hits the bulk of the benchmark's remaining cost: `//div`, `//*`,
// `//*[@class='x']`, `//div[@class='x']`, `//div[contains(@class,'x')]`.
//
// "Safe" predicates: not numeric at the top level (number, neg,
// arithmetic binop, or a fn-call returning a number), and free of
// `position()`/`last()` anywhere in the predicate AST. Numeric predicates
// would need `position()` context which the fused walk doesn't track,
// and a `position()`/`last()` reference inside a sub-path's own step is
// rejected conservatively even though it's local to that sub-axis.
fn tryFusedDescendantFastPath(self: *Evaluator, path: ast.Path, ctx: *Node) Error!?result.Result {
const target = matchDescendantPathShape(path) orelse return null;
if (target.axis != .child) return null;
for (target.predicates) |p| {
if (!isSafeNonPositionalPredicate(p)) return null;
}
const lowered_name: ?[]const u8 = switch (target.node_test) {
.name => |n| if (std.mem.eql(u8, n, "*")) null else try std.ascii.allocLowerString(self.arena, n),
.type_test => null,
};
const search_root: *Node = if (path.absolute) blk: {
if (ctx._type == .document) break :blk ctx;
const owner = ctx.ownerDocument(self.frame) orelse return null;
break :blk owner.asNode();
} else ctx;
var out: std.ArrayList(*Node) = .empty;
try self.fusedDescend(search_root, target, lowered_name, &out);
return .{ .node_set = out.items };
}
fn fusedDescend(
self: *Evaluator,
parent: *Node,
target: ast.Step,
lowered_name: ?[]const u8,
out: *std.ArrayList(*Node),
) Error!void {
var it = parent.childrenIterator();
while (it.next()) |c| {
if (matchTest(c, target.node_test, target.axis, lowered_name)) {
var ok = true;
for (target.predicates) |pred| {
// Position / size are synthetic. Safe because the
// predicate-safety gate already rejected any expression
// that depends on either.
const val = try self.evalExpr(pred, c, 1, 1);
if (!result.toBoolean(val)) {
ok = false;
break;
}
}
if (ok) try out.append(self.arena, c);
}
try self.fusedDescend(c, target, lowered_name, out);
}
}
fn matchDescendantPathShape(path: ast.Path) ?ast.Step {
return switch (path.steps.len) {
2 => blk: {
if (!isDescendantOrSelfNode(path.steps[0])) break :blk null;
break :blk path.steps[1];
},
3 => blk: {
if (!isSelfNode(path.steps[0])) break :blk null;
if (!isDescendantOrSelfNode(path.steps[1])) break :blk null;
break :blk path.steps[2];
},
else => null,
};
}
fn isSafeNonPositionalPredicate(expr: *const ast.Expr) bool {
if (isNumericTopLevel(expr)) return false;
if (containsPositionOrLast(expr)) return false;
return true;
}
fn isNumericTopLevel(expr: *const ast.Expr) bool {
return switch (expr.*) {
.number, .neg => true,
.binop => |bo| switch (bo.op) {
.add, .sub, .mul, .div, .mod => true,
else => false,
},
.fn_call => |fc| isNumericFnName(fc.name),
else => false,
};
}
fn isNumericFnName(name: []const u8) bool {
const numeric = [_][]const u8{
"position", "last", "count", "sum",
"floor", "ceiling", "round", "number",
"string-length",
};
for (numeric) |n| {
if (std.mem.eql(u8, name, n)) return true;
}
return false;
}
fn containsPositionOrLast(expr: *const ast.Expr) bool {
return switch (expr.*) {
.number, .literal, .var_ref => false,
.neg => |inner| containsPositionOrLast(inner),
.binop => |bo| containsPositionOrLast(bo.left) or containsPositionOrLast(bo.right),
.filter => |f| containsPositionOrLast(f.expr) or containsPositionOrLast(f.predicate),
.filter_path => |fp| containsPositionOrLast(fp.filter) or stepsContainPositionOrLast(fp.steps),
.path => |p| stepsContainPositionOrLast(p.steps),
.fn_call => |fc| std.mem.eql(u8, fc.name, "position") or
std.mem.eql(u8, fc.name, "last") or
argsContainPositionOrLast(fc.args),
};
}
fn stepsContainPositionOrLast(steps: []const ast.Step) bool {
for (steps) |s| {
for (s.predicates) |p| {
if (containsPositionOrLast(p)) return true;
}
}
return false;
}
fn argsContainPositionOrLast(args: []const *ast.Expr) bool {
for (args) |a| {
if (containsPositionOrLast(a)) return true;
}
return false;
}
fn isDescendantOrSelfNode(s: ast.Step) bool {
if (s.axis != .descendant_or_self) return false;
if (s.predicates.len != 0) return false;
return switch (s.node_test) {
.type_test => |k| k == .node,
.name => false,
};
}
fn isSelfNode(s: ast.Step) bool {
if (s.axis != .self) return false;
if (s.predicates.len != 0) return false;
return switch (s.node_test) {
.type_test => |k| k == .node,
.name => false,
};
}
fn matchAttrEqLiteral(expr: *const ast.Expr, attr_name: []const u8) ?[]const u8 {
if (expr.* != .binop) return null;
const bo = expr.binop;
if (bo.op != .eq) return null;
if (isAttrPath(bo.left, attr_name) and bo.right.* == .literal) return bo.right.literal;
if (isAttrPath(bo.right, attr_name) and bo.left.* == .literal) return bo.left.literal;
return null;
}
fn isAttrPath(expr: *const ast.Expr, attr_name: []const u8) bool {
if (expr.* != .path) return false;
const p = expr.path;
if (p.absolute) return false;
if (p.steps.len != 1) return false;
const s = p.steps[0];
if (s.axis != .attribute) return false;
if (s.predicates.len != 0) return false;
return switch (s.node_test) {
.name => |n| std.mem.eql(u8, n, attr_name),
.type_test => false,
};
}
fn evalFilterPath(self: *Evaluator, fp: ast.FilterPath, ctx: *Node, pos: usize, size: usize) Error!result.Result {
const base = try self.evalExpr(fp.filter, ctx, pos, size);
if (base != .node_set) return base;
var current: []const *Node = base.node_set;
for (fp.steps) |step| {
const r = try self.evalStep(current, step);
current = r.node_set;
}
return .{ .node_set = current };
}
fn evalFilter(self: *Evaluator, f: ast.Filter, ctx: *Node, pos: usize, size: usize) Error!result.Result {
const base = try self.evalExpr(f.expr, ctx, pos, size);
if (base != .node_set) return base;
var out: std.ArrayList(*Node) = .empty;
const sz = base.node_set.len;
for (base.node_set, 0..) |n, idx| {
const k = idx + 1;
const val = try self.evalExpr(f.predicate, n, k, sz);
if (predicateMatches(val, k)) try out.append(self.arena, n);
}
return .{ .node_set = out.items };
}
// ----- step + axis -----
fn evalStep(self: *Evaluator, ctx_nodes: []const *Node, step: ast.Step) Error!result.Result {
var dedup: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty;
// Pre-lowercase the name test once per step. matchNameTest does
// case-insensitive matching (decision #2); without this hoist, every
// axis node would pay the per-byte case-fold inside `eqlIgnoreCase`.
const lowered_name: ?[]const u8 = switch (step.node_test) {
.name => |n| if (std.mem.eql(u8, n, "*")) null else try std.ascii.allocLowerString(self.arena, n),
.type_test => null,
};
for (ctx_nodes) |ctx| {
const axis_nodes = try self.axisNodes(ctx, step.axis);
var filtered: std.ArrayList(*Node) = .empty;
for (axis_nodes) |n| {
if (matchTest(n, step.node_test, step.axis, lowered_name)) {
try filtered.append(self.arena, n);
}
}
var current: []const *Node = filtered.items;
for (step.predicates) |pred| {
var next: std.ArrayList(*Node) = .empty;
const sz = current.len;
for (current, 0..) |n, idx| {
const k = idx + 1;
const val = try self.evalExpr(pred, n, k, sz);
if (predicateMatches(val, k)) try next.append(self.arena, n);
}
current = next.items;
}
for (current) |n| try dedup.put(self.arena, n, {});
}
return .{ .node_set = dedup.keys() };
}
fn axisNodes(self: *Evaluator, node: *Node, axis: ast.Axis) Error![]const *Node {
var out: std.ArrayList(*Node) = .empty;
switch (axis) {
.child => {
var it = node.childrenIterator();
while (it.next()) |c| try out.append(self.arena, c);
},
.descendant => try self.appendDescendants(node, &out),
.descendant_or_self => {
try out.append(self.arena, node);
try self.appendDescendants(node, &out);
},
.self => try out.append(self.arena, node),
.parent => {
if (node.parentNode()) |p| try out.append(self.arena, p);
},
// Reverse axes — proximity order (nearest first). Final node-set
// is sorted to document order at the public boundary.
.ancestor => {
var p = node.parentNode();
while (p) |n| : (p = n.parentNode()) try out.append(self.arena, n);
},
.ancestor_or_self => {
try out.append(self.arena, node);
var p = node.parentNode();
while (p) |n| : (p = n.parentNode()) try out.append(self.arena, n);
},
.following_sibling => {
var s = node.nextSibling();
while (s) |n| : (s = n.nextSibling()) try out.append(self.arena, n);
},
.preceding_sibling => {
var s = node.previousSibling();
while (s) |n| : (s = n.previousSibling()) try out.append(self.arena, n);
},
.following => try self.appendFollowing(node, &out),
.preceding => try self.appendPreceding(node, &out),
.attribute => try self.appendAttributes(node, &out),
.namespace, .unknown => {}, // decision #3 stubs
}
return out.items;
}
fn appendDescendants(self: *Evaluator, node: *Node, out: *std.ArrayList(*Node)) Error!void {
var it = node.childrenIterator();
while (it.next()) |c| {
try out.append(self.arena, c);
try self.appendDescendants(c, out);
}
}
fn appendFollowing(self: *Evaluator, start: *Node, out: *std.ArrayList(*Node)) Error!void {
var n: ?*Node = start;
while (n) |cur| : (n = cur.parentNode()) {
var s = cur.nextSibling();
while (s) |sn| : (s = sn.nextSibling()) {
try out.append(self.arena, sn);
try self.appendDescendants(sn, out);
}
}
}
fn appendPrecedingSubtree(self: *Evaluator, n: *Node, out: *std.ArrayList(*Node)) Error!void {
// Reverse document order: deepest-last children first, then self.
var c = n.lastChild();
while (c) |child| : (c = child.previousSibling()) {
try self.appendPrecedingSubtree(child, out);
}
try out.append(self.arena, n);
}
fn appendPreceding(self: *Evaluator, start: *Node, out: *std.ArrayList(*Node)) Error!void {
var n: ?*Node = start;
while (n) |cur| {
const parent = cur.parentNode() orelse break;
var s = cur.previousSibling();
while (s) |sn| : (s = sn.previousSibling()) {
try self.appendPrecedingSubtree(sn, out);
}
n = parent;
}
}
fn appendAttributes(self: *Evaluator, node: *Node, out: *std.ArrayList(*Node)) Error!void {
const el = node.is(Element) orelse return;
var it = el.attributeIterator();
while (it.next()) |entry| {
// Memoize via frame._attribute_lookup so repeated XPath queries
// (Capybara/Selenium polling) reuse the same *Attribute instead
// of leaking fresh ones into page-lifetime storage on every call.
// Same pattern as Attribute.List.getAttribute / NamedNodeMap.getAtIndex.
const gop = try self.frame._attribute_lookup.getOrPut(self.frame.arena, @intFromPtr(entry));
if (!gop.found_existing) {
gop.value_ptr.* = try entry.toAttribute(el, self.frame);
}
try out.append(self.arena, gop.value_ptr.*._proto);
}
}
// ----- node test matching -----
fn matchTest(node: *Node, test_: ast.NodeTest, axis: ast.Axis, lowered_name: ?[]const u8) bool {
return switch (test_) {
.type_test => |kind| switch (kind) {
.node => true,
// XPath 1.0 §5.7: the data model has no CDATASection node —
// CDATA content is part of the text node value. Match both
// Text (3) and CDATASection (4) DOM node types.
.text => node.getNodeType() == 3 or node.getNodeType() == 4,
.comment => node.getNodeType() == 8,
.processing_instruction => node.getNodeType() == 7,
},
.name => |name| matchNameTest(node, name, axis, lowered_name),
};
}
fn matchNameTest(node: *Node, name: []const u8, axis: ast.Axis, lowered_name: ?[]const u8) bool {
// `lowered_name` is non-null iff `name != "*"`. Element tag names
// (`getTagNameLower`) and html5ever-stored attribute names are already
// lowercase, so a plain `mem.eql` against the pre-lowered test name
// replaces the per-call `eqlIgnoreCase`.
if (axis == .attribute) {
if (std.mem.eql(u8, name, "*")) return node._type == .attribute;
const attr = switch (node._type) {
.attribute => |a| a,
else => return false,
};
return std.mem.eql(u8, attr._name.str(), lowered_name.?);
}
const el = node.is(Element) orelse return false;
if (std.mem.eql(u8, name, "*")) return true;
return std.mem.eql(u8, el.getTagNameLower(), lowered_name.?);
}
// ----- binop -----
fn evalBinop(self: *Evaluator, bo: ast.BinOp, ctx: *Node, pos: usize, size: usize) Error!result.Result {
switch (bo.op) {
.or_ => {
const l = try self.evalExpr(bo.left, ctx, pos, size);
if (result.toBoolean(l)) return .{ .boolean = true };
const r = try self.evalExpr(bo.right, ctx, pos, size);
return .{ .boolean = result.toBoolean(r) };
},
.and_ => {
const l = try self.evalExpr(bo.left, ctx, pos, size);
if (!result.toBoolean(l)) return .{ .boolean = false };
const r = try self.evalExpr(bo.right, ctx, pos, size);
return .{ .boolean = result.toBoolean(r) };
},
.eq, .neq, .lt, .gt, .lte, .gte => {
const l = try self.evalExpr(bo.left, ctx, pos, size);
const r = try self.evalExpr(bo.right, ctx, pos, size);
return .{ .boolean = try self.xCmp(l, r, bo.op) };
},
.add, .sub, .mul, .div, .mod => {
const l = try self.evalExpr(bo.left, ctx, pos, size);
const r = try self.evalExpr(bo.right, ctx, pos, size);
const ln = try result.toNumber(self.arena, l);
const rn = try result.toNumber(self.arena, r);
const v: f64 = switch (bo.op) {
.add => ln + rn,
.sub => ln - rn,
.mul => ln * rn,
.div => ln / rn,
// JS `%` and Zig `@rem` agree on sign for finite values
// and propagate NaN (XPath §3.5).
.mod => @rem(ln, rn),
else => unreachable,
};
return .{ .number = v };
},
.union_ => {
const l = try self.evalExpr(bo.left, ctx, pos, size);
const r = try self.evalExpr(bo.right, ctx, pos, size);
if (l != .node_set or r != .node_set) return error.UnionRequiresNodeSets;
var seen: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty;
for (l.node_set) |n| try seen.put(self.arena, n, {});
for (r.node_set) |n| try seen.put(self.arena, n, {});
const nodes = seen.keys();
sortDocOrder(@constCast(nodes));
return .{ .node_set = nodes };
},
}
}
// ----- comparison (XPath spec §3.4) -----
fn xCmp(self: *Evaluator, left: result.Result, right: result.Result, op: ast.BinOpKind) Error!bool {
const is_eq = (op == .eq or op == .neq);
const l_is_set = (left == .node_set);
const r_is_set = (right == .node_set);
if (l_is_set and r_is_set) {
// Cache right-side string-values once. Without this, each left node
// would pay |right| allocations — O(N×M) for a set×set comparison
// (e.g. `//foo = //bar` on a large page).
const right_strings = try self.arena.alloc([]const u8, right.node_set.len);
for (right.node_set, 0..) |r, i| {
right_strings[i] = try result.stringValueOf(self.arena, r);
}
for (left.node_set) |l| {
const lv = try result.stringValueOf(self.arena, l);
for (right_strings) |rv| {
const matched = if (is_eq)
cmpString(lv, rv, op)
else
cmpNumber(result.stringToNumber(lv), result.stringToNumber(rv), op);
if (matched) return true;
}
}
return false;
}
if (l_is_set or r_is_set) {
const ns = if (l_is_set) left.node_set else right.node_set;
const other = if (l_is_set) right else left;
const ns_left = l_is_set;
if (other == .boolean) {
const ns_b = ns.len > 0;
const a, const b = if (ns_left) .{ ns_b, other.boolean } else .{ other.boolean, ns_b };
return cmpBool(a, b, op);
}
for (ns) |n| {
const sv = try result.stringValueOf(self.arena, n);
const matched = switch (other) {
.number => |num| blk: {
const sv_num = result.stringToNumber(sv);
const a, const b = if (ns_left) .{ sv_num, num } else .{ num, sv_num };
break :blk cmpNumber(a, b, op);
},
.string => |s| blk: {
if (is_eq) {
const a, const b = if (ns_left) .{ sv, s } else .{ s, sv };
break :blk cmpString(a, b, op);
}
const sv_num = result.stringToNumber(sv);
const s_num = result.stringToNumber(s);
const a, const b = if (ns_left) .{ sv_num, s_num } else .{ s_num, sv_num };
break :blk cmpNumber(a, b, op);
},
.boolean, .node_set => unreachable, // handled above
};
if (matched) return true;
}
return false;
}
// Neither is a node-set.
if (is_eq) {
if (left == .boolean or right == .boolean) {
return cmpBool(result.toBoolean(left), result.toBoolean(right), op);
}
if (left == .number or right == .number) {
const ln = try result.toNumber(self.arena, left);
const rn = try result.toNumber(self.arena, right);
return cmpNumber(ln, rn, op);
}
const ls = try result.toString(self.arena, left);
const rs = try result.toString(self.arena, right);
return cmpString(ls, rs, op);
}
// Non-eq with no node-set: both → number.
const ln = try result.toNumber(self.arena, left);
const rn = try result.toNumber(self.arena, right);
return cmpNumber(ln, rn, op);
}
fn cmpString(a: []const u8, b: []const u8, op: ast.BinOpKind) bool {
const equal = std.mem.eql(u8, a, b);
return switch (op) {
.eq => equal,
.neq => !equal,
else => unreachable, // <, > etc. always coerce to number first
};
}
fn cmpNumber(a: f64, b: f64, op: ast.BinOpKind) bool {
// Native f64 comparison gives correct NaN semantics:
// NaN == X is false, NaN != X is true, NaN < X (etc.) is false.
return switch (op) {
.eq => a == b,
.neq => a != b,
.lt => a < b,
.gt => a > b,
.lte => a <= b,
.gte => a >= b,
else => unreachable,
};
}
fn cmpBool(a: bool, b: bool, op: ast.BinOpKind) bool {
return switch (op) {
.eq => a == b,
.neq => a != b,
else => unreachable,
};
}
// ----- function calls -----
fn evalFnCall(self: *Evaluator, fc: ast.FnCall, ctx: *Node, pos: usize, size: usize) Error!result.Result {
// position()/last() stay here — they need the (pos, size) closure
// that functions.call doesn't see. Keeping them inline avoids
// pushing per-call context through Functions' signature.
if (std.mem.eql(u8, fc.name, "position")) return .{ .number = @floatFromInt(pos) };
if (std.mem.eql(u8, fc.name, "last")) return .{ .number = @floatFromInt(size) };
// Eagerly evaluate args. Matches the polyfill's `evaluate(args[i], ...)`
// pattern; lazy short-circuit isn't needed because `or`/`and` are
// binops handled in evalBinop, not function calls.
const eval_args = try self.arena.alloc(result.Result, fc.args.len);
for (fc.args, 0..) |a, i| eval_args[i] = try self.evalExpr(a, ctx, pos, size);
return functions.call(self.arena, fc.name, eval_args, ctx, self.frame);
}
// ----- helpers -----
fn predicateMatches(val: result.Result, position: usize) bool {
return switch (val) {
// Numeric predicate value selects only the node at that position
// (1-based). Non-integer numbers never match.
.number => |n| n == @as(f64, @floatFromInt(position)),
else => result.toBoolean(val),
};
}
pub fn sortDocOrder(nodes: []*Node) void {
if (nodes.len <= 1) return;
std.mem.sort(*Node, nodes, {}, lessThanDocOrder);
}
fn lessThanDocOrder(_: void, a: *Node, b: *Node) bool {
if (a == b) return false;
const pos = a.compareDocumentPosition(b);
// FOLLOWING (0x04) — b comes after a in document order.
return (pos & 0x04) != 0;
}
// ---------------------------------------------------------------------
// Tests — pure-logic only. DOM-dependent evaluation lands as HTML
// fixtures in Phase 9 (tests/xpath/*.html); Lightpanda has no in-Zig
// way to construct a Frame + Document tree without the JS runtime.
// ---------------------------------------------------------------------
const testing = std.testing;
const Tokenizer = @import("Tokenizer.zig");
test "Evaluator: cmpNumber NaN semantics" {
const nan = std.math.nan(f64);
try testing.expect(!cmpNumber(nan, nan, .eq));
try testing.expect(cmpNumber(nan, nan, .neq));
try testing.expect(!cmpNumber(nan, 0, .lt));
try testing.expect(!cmpNumber(nan, 0, .gt));
try testing.expect(!cmpNumber(nan, 0, .lte));
try testing.expect(!cmpNumber(nan, 0, .gte));
try testing.expect(cmpNumber(0, 0, .eq));
try testing.expect(cmpNumber(1, 2, .lt));
try testing.expect(cmpNumber(2, 1, .gt));
try testing.expect(cmpNumber(1, 1, .lte));
try testing.expect(cmpNumber(1, 1, .gte));
}
test "Evaluator: cmpString" {
try testing.expect(cmpString("a", "a", .eq));
try testing.expect(!cmpString("a", "b", .eq));
try testing.expect(cmpString("a", "b", .neq));
try testing.expect(!cmpString("a", "a", .neq));
}
test "Evaluator: cmpBool" {
try testing.expect(cmpBool(true, true, .eq));
try testing.expect(!cmpBool(true, false, .eq));
try testing.expect(cmpBool(true, false, .neq));
}
test "Evaluator: predicateMatches numeric vs boolean" {
try testing.expect(predicateMatches(.{ .number = 1 }, 1));
try testing.expect(!predicateMatches(.{ .number = 2 }, 1));
// Non-integer never matches.
try testing.expect(!predicateMatches(.{ .number = 1.5 }, 1));
// Boolean: any truthy value passes regardless of position.
try testing.expect(predicateMatches(.{ .boolean = true }, 7));
try testing.expect(!predicateMatches(.{ .boolean = false }, 1));
// String: nonempty truthy.
try testing.expect(predicateMatches(.{ .string = "x" }, 99));
try testing.expect(!predicateMatches(.{ .string = "" }, 1));
// Empty node-set: falsy.
try testing.expect(!predicateMatches(.{ .node_set = &.{} }, 1));
}
test "Evaluator: scalar arithmetic via parsed expressions" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
inline for (.{
.{ "1 + 2", 3 },
.{ "5 - 3", 2 },
.{ "4 * 2", 8 },
.{ "10 div 4", 2.5 },
.{ "10 mod 3", 1 },
.{ "-5", -5 },
.{ "1 + 2 * 3", 7 },
}) |case| {
const expr = try Parser.parse(a, case[0]);
// Frame is unused for pure-arithmetic AST. The unsafe cast lets
// us exercise binop / number paths without a real DOM. Any path
// accessing the Frame would crash; the inputs above never do.
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
const ctx_dummy: *Node = @ptrFromInt(0x2000);
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
try testing.expect(r == .number);
try testing.expectEqual(@as(f64, case[1]), r.number);
}
}
test "Evaluator: scalar comparison via parsed expressions" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
inline for (.{
.{ "1 = 1", true },
.{ "1 = 2", false },
.{ "1 != 2", true },
.{ "1 < 2", true },
.{ "2 < 1", false },
.{ "1 <= 1", true },
.{ "2 >= 2", true },
.{ "'abc' = 'abc'", true },
.{ "'abc' != 'abd'", true },
}) |case| {
const expr = try Parser.parse(a, case[0]);
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
const ctx_dummy: *Node = @ptrFromInt(0x2000);
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
try testing.expect(r == .boolean);
try testing.expectEqual(case[1], r.boolean);
}
}
test "Evaluator: position() and last() reflect context" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
const ctx_dummy: *Node = @ptrFromInt(0x2000);
{
const expr = try Parser.parse(a, "position()");
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
const r = try ev.evalExpr(expr, ctx_dummy, 3, 5);
try testing.expectEqual(@as(f64, 3), r.number);
}
{
const expr = try Parser.parse(a, "last()");
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
const r = try ev.evalExpr(expr, ctx_dummy, 3, 5);
try testing.expectEqual(@as(f64, 5), r.number);
}
{
// Logical short-circuit: last() never evaluates if first
// operand is true.
const expr = try Parser.parse(a, "1 = 1 or last() > 0");
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
try testing.expect(r.boolean);
}
}
test "Evaluator: short-circuit and/or" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
const ctx_dummy: *Node = @ptrFromInt(0x2000);
inline for (.{
.{ "1 = 2 or 1 = 1", true },
.{ "1 = 1 and 1 = 2", false },
.{ "1 = 1 and 2 = 2", true },
.{ "1 = 2 and 1 = 1", false },
.{ "1 = 2 or 2 = 1", false },
}) |case| {
const expr = try Parser.parse(a, case[0]);
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
try testing.expect(r == .boolean);
try testing.expectEqual(case[1], r.boolean);
}
}
test "Evaluator: unary minus" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
const ctx_dummy: *Node = @ptrFromInt(0x2000);
const expr = try Parser.parse(a, "-(3 + 2)");
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
try testing.expectEqual(@as(f64, -5), r.number);
}
test "Evaluator: division by zero produces infinity / NaN per IEEE" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
const ctx_dummy: *Node = @ptrFromInt(0x2000);
{
const expr = try Parser.parse(a, "1 div 0");
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
try testing.expect(std.math.isPositiveInf(r.number));
}
{
const expr = try Parser.parse(a, "0 div 0");
var ev = Evaluator{ .arena = a, .frame = @ptrFromInt(0x1000) };
const r = try ev.evalExpr(expr, ctx_dummy, 1, 1);
try testing.expect(std.math.isNan(r.number));
}
}
test "Evaluator: searchAll on scalar expression returns empty (decision #3)" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
// Synthetic frame/root pointers are safe here because pure-scalar
// expressions (binop, literal, true(), comparison) never reach into
// the Frame or the context node. Adding a DOM-touching expression
// (e.g. `id('x')`) to this list would crash on dereference.
inline for (.{ "1 + 2", "'hello'", "true()", "1 = 1" }) |expr| {
const nodes = try searchAll(a, @ptrFromInt(0x2000), expr, @ptrFromInt(0x1000));
try testing.expectEqual(@as(usize, 0), nodes.len);
}
}

View File

@@ -0,0 +1,957 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! XPath 1.0 expression parser.
//!
//! Recursive descent over a fully-tokenized stream, producing an
//! `ast.Expr` tree allocated on the caller's arena. The AST borrows
//! string/name slices from `input` and is valid for as long as the
//! arena and input outlive it.
const std = @import("std");
const Tokenizer = @import("Tokenizer.zig");
const ast = @import("ast.zig");
const Token = Tokenizer.Token;
const Allocator = std.mem.Allocator;
const Parser = @This();
pub const Error = error{
OutOfMemory,
UnexpectedToken,
ExpectedNodeTest,
ExpectedPrimaryExpr,
MaxDepthExceeded,
};
/// Cap recursive descent to keep adversarial input (e.g. `(((((...)))))`,
/// `------5`) from blowing the stack. Real XPath expressions never come
/// close to this; browsers typically allow several hundred.
const max_depth: u16 = 64;
arena: Allocator,
tokens: []const Token,
pos: usize = 0,
depth: u16 = 0,
pub fn parse(arena: Allocator, input: []const u8) Error!*ast.Expr {
var token_list: std.ArrayList(Token) = .empty;
// Token count is bounded by input length; ¼-byte-per-token is
// generous for typical XPath and skips ArrayList regrowth.
try token_list.ensureTotalCapacity(arena, @max(8, input.len / 4));
var tokenizer = Tokenizer{ .input = input };
while (true) {
const tok = tokenizer.next();
try token_list.append(arena, tok);
if (tok == .eof) break;
}
var parser = Parser{
.arena = arena,
.tokens = token_list.items,
};
const expr = try parser.parseExpr();
if (parser.peek() != .eof) return error.UnexpectedToken;
return expr;
}
// --- token cursor helpers ---
fn peek(self: *const Parser) Token {
return self.tokens[self.pos];
}
fn lookahead(self: *const Parser, offset: usize) Token {
const idx = self.pos + offset;
if (idx >= self.tokens.len) return .eof;
return self.tokens[idx];
}
fn advance(self: *Parser) Token {
const tok = self.tokens[self.pos];
self.pos += 1;
return tok;
}
fn at(self: *const Parser, tag: std.meta.Tag(Token)) bool {
return self.peek() == tag;
}
fn match(self: *Parser, tag: std.meta.Tag(Token)) bool {
if (self.at(tag)) {
_ = self.advance();
return true;
}
return false;
}
fn expect(self: *Parser, tag: std.meta.Tag(Token)) Error!Token {
if (!self.at(tag)) return error.UnexpectedToken;
return self.advance();
}
fn matchKeyword(self: *Parser, keyword: []const u8) bool {
const tok = self.peek();
if (tok == .name and std.mem.eql(u8, tok.name, keyword)) {
_ = self.advance();
return true;
}
return false;
}
fn makeExpr(self: *Parser, value: ast.Expr) Error!*ast.Expr {
const expr = try self.arena.create(ast.Expr);
expr.* = value;
return expr;
}
fn makeBinop(self: *Parser, op: ast.BinOpKind, left: *ast.Expr, right: *ast.Expr) Error!*ast.Expr {
return try self.makeExpr(.{ .binop = .{ .op = op, .left = left, .right = right } });
}
// --- operator-precedence chain ---
//
// Or → And → Equality → Relational → Additive → Mult → Unary → Union → Path
fn parseExpr(self: *Parser) Error!*ast.Expr {
if (self.depth >= max_depth) return error.MaxDepthExceeded;
self.depth += 1;
defer self.depth -= 1;
return self.parseOrExpr();
}
fn parseOrExpr(self: *Parser) Error!*ast.Expr {
var left = try self.parseAndExpr();
while (self.matchKeyword("or")) {
const right = try self.parseAndExpr();
left = try self.makeBinop(.or_, left, right);
}
return left;
}
fn parseAndExpr(self: *Parser) Error!*ast.Expr {
var left = try self.parseEqualityExpr();
while (self.matchKeyword("and")) {
const right = try self.parseEqualityExpr();
left = try self.makeBinop(.and_, left, right);
}
return left;
}
fn parseEqualityExpr(self: *Parser) Error!*ast.Expr {
var left = try self.parseRelationalExpr();
while (equalityOp(self.peek())) |op| {
_ = self.advance();
const right = try self.parseRelationalExpr();
left = try self.makeBinop(op, left, right);
}
return left;
}
fn parseRelationalExpr(self: *Parser) Error!*ast.Expr {
var left = try self.parseAdditiveExpr();
while (relationalOp(self.peek())) |op| {
_ = self.advance();
const right = try self.parseAdditiveExpr();
left = try self.makeBinop(op, left, right);
}
return left;
}
fn parseAdditiveExpr(self: *Parser) Error!*ast.Expr {
var left = try self.parseMultExpr();
while (additiveOp(self.peek())) |op| {
_ = self.advance();
const right = try self.parseMultExpr();
left = try self.makeBinop(op, left, right);
}
return left;
}
// After a complete unary expression, `*` is multiply; `div`/`mod` are
// operator-position keywords (tokenized as Name).
fn parseMultExpr(self: *Parser) Error!*ast.Expr {
var left = try self.parseUnaryExpr();
while (multOp(self.peek())) |op| {
_ = self.advance();
const right = try self.parseUnaryExpr();
left = try self.makeBinop(op, left, right);
}
return left;
}
fn parseUnaryExpr(self: *Parser) Error!*ast.Expr {
if (self.match(.minus)) {
if (self.depth >= max_depth) return error.MaxDepthExceeded;
self.depth += 1;
defer self.depth -= 1;
const operand = try self.parseUnaryExpr();
return try self.makeExpr(.{ .neg = operand });
}
return self.parseUnionExpr();
}
fn parseUnionExpr(self: *Parser) Error!*ast.Expr {
var left = try self.parsePathExpr();
while (self.match(.pipe)) {
const right = try self.parsePathExpr();
left = try self.makeBinop(.union_, left, right);
}
return left;
}
// --- path expressions ---
fn parsePathExpr(self: *Parser) Error!*ast.Expr {
const t = self.peek();
if (t == .slash or t == .double_slash) {
return self.parseAbsPath();
}
// Filter-vs-relative-path disambiguation: a primary expression
// starts with `(`, string, number, `$`, or a `name(` where the
// name is *not* a node-type test (`node`/`text`/`comment`/`processing-instruction`).
const is_filter = switch (t) {
.lparen, .string, .number, .dollar => true,
.name => |name| self.lookahead(1) == .lparen and !isNodeTypeName(name),
else => false,
};
if (is_filter) {
var primary = try self.parsePrimaryExpr();
while (self.match(.lbracket)) {
const pred = try self.parseExpr();
_ = try self.expect(.rbracket);
primary = try self.makeExpr(.{ .filter = .{ .expr = primary, .predicate = pred } });
}
if (self.peek() == .slash or self.peek() == .double_slash) {
const dsl = self.advance() == .double_slash;
var steps: std.ArrayList(ast.Step) = .empty;
if (dsl) try steps.append(self.arena, descendantOrSelfStep());
try self.parseRelStepsInto(&steps);
return try self.makeExpr(.{ .filter_path = .{
.filter = primary,
.steps = steps.items,
} });
}
return primary;
}
return self.parseRelPath();
}
fn parseAbsPath(self: *Parser) Error!*ast.Expr {
var steps: std.ArrayList(ast.Step) = .empty;
if (self.match(.double_slash)) {
try steps.append(self.arena, descendantOrSelfStep());
try self.parseRelStepsInto(&steps);
} else {
_ = try self.expect(.slash);
// `/` alone is the document root — no step required.
if (self.canStartStep()) try self.parseRelStepsInto(&steps);
}
return try self.makeExpr(.{ .path = .{
.absolute = true,
.steps = steps.items,
} });
}
fn parseRelPath(self: *Parser) Error!*ast.Expr {
var steps: std.ArrayList(ast.Step) = .empty;
try self.parseRelStepsInto(&steps);
return try self.makeExpr(.{ .path = .{
.absolute = false,
.steps = steps.items,
} });
}
fn parseRelStepsInto(self: *Parser, steps: *std.ArrayList(ast.Step)) Error!void {
try steps.append(self.arena, try self.parseStep());
while (self.peek() == .slash or self.peek() == .double_slash) {
if (self.advance() == .double_slash) {
try steps.append(self.arena, descendantOrSelfStep());
}
try steps.append(self.arena, try self.parseStep());
}
}
fn canStartStep(self: *const Parser) bool {
return switch (self.peek()) {
.name, .star, .dot, .double_dot, .at => true,
else => false,
};
}
fn parseStep(self: *Parser) Error!ast.Step {
// Abbreviated steps `.` and `..` carry no axis, node-test, or
// predicates — predicates after `.` are a parse error per polyfill.
if (self.match(.dot)) return abbreviatedStep(.self);
if (self.match(.double_dot)) return abbreviatedStep(.parent);
var axis: ast.Axis = .child;
if (self.match(.at)) {
axis = .attribute;
} else if (self.peek() == .name and self.lookahead(1) == .double_colon) {
const axis_name = self.advance().name;
_ = self.advance(); // `::`
axis = parseAxisName(axis_name);
}
const node_test = try self.parseNodeTest();
var preds: std.ArrayList(*ast.Expr) = .empty;
while (self.match(.lbracket)) {
const pred = try self.parseExpr();
_ = try self.expect(.rbracket);
try preds.append(self.arena, pred);
}
return .{ .axis = axis, .node_test = node_test, .predicates = preds.items };
}
fn parseNodeTest(self: *Parser) Error!ast.NodeTest {
if (self.match(.star)) return .{ .name = "*" };
if (self.peek() != .name) return error.ExpectedNodeTest;
const name = self.peek().name;
if (typeTestKind(name)) |type_test| {
if (self.lookahead(1) == .lparen) {
_ = self.advance(); // name
_ = self.advance(); // `(`
// `processing-instruction("target")` consumes the literal but ignores it (decision #3 stub).
if (type_test == .processing_instruction and self.peek() == .string) {
_ = self.advance();
}
_ = try self.expect(.rparen);
return .{ .type_test = type_test };
}
}
_ = self.advance();
return .{ .name = name };
}
fn parsePrimaryExpr(self: *Parser) Error!*ast.Expr {
switch (self.peek()) {
.string => |s| {
_ = self.advance();
return try self.makeExpr(.{ .literal = s });
},
.number => |n| {
_ = self.advance();
return try self.makeExpr(.{ .number = n });
},
.dollar => {
_ = self.advance();
const name_tok = try self.expect(.name);
return try self.makeExpr(.{ .var_ref = name_tok.name });
},
.lparen => {
_ = self.advance();
const e = try self.parseExpr();
_ = try self.expect(.rparen);
return e;
},
.name => |name| {
_ = self.advance();
_ = try self.expect(.lparen);
var args: std.ArrayList(*ast.Expr) = .empty;
if (self.peek() != .rparen) {
try args.append(self.arena, try self.parseExpr());
while (self.match(.comma)) {
try args.append(self.arena, try self.parseExpr());
}
}
_ = try self.expect(.rparen);
return try self.makeExpr(.{ .fn_call = .{ .name = name, .args = args.items } });
},
else => return error.ExpectedPrimaryExpr,
}
}
// --- pure helpers ---
fn equalityOp(t: Token) ?ast.BinOpKind {
return switch (t) {
.eq => .eq,
.neq => .neq,
else => null,
};
}
fn relationalOp(t: Token) ?ast.BinOpKind {
return switch (t) {
.lt => .lt,
.gt => .gt,
.lte => .lte,
.gte => .gte,
else => null,
};
}
fn additiveOp(t: Token) ?ast.BinOpKind {
return switch (t) {
.plus => .add,
.minus => .sub,
else => null,
};
}
fn multOp(t: Token) ?ast.BinOpKind {
return switch (t) {
.star => .mul,
.name => |name| blk: {
if (std.mem.eql(u8, name, "div")) break :blk .div;
if (std.mem.eql(u8, name, "mod")) break :blk .mod;
break :blk null;
},
else => null,
};
}
fn descendantOrSelfStep() ast.Step {
return .{
.axis = .descendant_or_self,
.node_test = .{ .type_test = .node },
.predicates = &.{},
};
}
fn abbreviatedStep(axis: ast.Axis) ast.Step {
return .{
.axis = axis,
.node_test = .{ .type_test = .node },
.predicates = &.{},
};
}
fn isNodeTypeName(name: []const u8) bool {
return typeTestKind(name) != null;
}
const type_test_lookup = std.StaticStringMap(ast.TypeTest).initComptime(.{
.{ "node", .node },
.{ "text", .text },
.{ "comment", .comment },
.{ "processing-instruction", .processing_instruction },
});
fn typeTestKind(name: []const u8) ?ast.TypeTest {
return type_test_lookup.get(name);
}
const axis_lookup = std.StaticStringMap(ast.Axis).initComptime(.{
.{ "child", .child },
.{ "descendant", .descendant },
.{ "descendant-or-self", .descendant_or_self },
.{ "self", .self },
.{ "parent", .parent },
.{ "ancestor", .ancestor },
.{ "ancestor-or-self", .ancestor_or_self },
.{ "following-sibling", .following_sibling },
.{ "preceding-sibling", .preceding_sibling },
.{ "following", .following },
.{ "preceding", .preceding },
.{ "attribute", .attribute },
.{ "namespace", .namespace },
});
fn parseAxisName(name: []const u8) ast.Axis {
return axis_lookup.get(name) orelse .unknown;
}
// ---------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------
const testing = std.testing;
fn parseFixture(input: []const u8) !struct { arena: std.heap.ArenaAllocator, expr: *ast.Expr } {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
errdefer arena.deinit();
const expr = try parse(arena.allocator(), input);
return .{ .arena = arena, .expr = expr };
}
test "XPath.Parser: number literal" {
var fx = try parseFixture("42");
defer fx.arena.deinit();
try testing.expectEqual(@as(f64, 42), fx.expr.number);
}
test "XPath.Parser: string literal" {
var fx = try parseFixture("'hello'");
defer fx.arena.deinit();
try testing.expectEqualStrings("hello", fx.expr.literal);
}
test "XPath.Parser: variable reference strips $" {
var fx = try parseFixture("$x");
defer fx.arena.deinit();
try testing.expectEqualStrings("x", fx.expr.var_ref);
}
test "XPath.Parser: parenthesized expression unwraps" {
var fx = try parseFixture("(42)");
defer fx.arena.deinit();
try testing.expectEqual(@as(f64, 42), fx.expr.number);
}
test "XPath.Parser: function call with no args" {
var fx = try parseFixture("position()");
defer fx.arena.deinit();
try testing.expectEqualStrings("position", fx.expr.fn_call.name);
try testing.expectEqual(@as(usize, 0), fx.expr.fn_call.args.len);
}
test "XPath.Parser: function call with args" {
var fx = try parseFixture("substring('abc', 2, 1)");
defer fx.arena.deinit();
const fc = fx.expr.fn_call;
try testing.expectEqualStrings("substring", fc.name);
try testing.expectEqual(@as(usize, 3), fc.args.len);
try testing.expectEqualStrings("abc", fc.args[0].literal);
try testing.expectEqual(@as(f64, 2), fc.args[1].number);
try testing.expectEqual(@as(f64, 1), fc.args[2].number);
}
test "XPath.Parser: arithmetic precedence — mul binds tighter than add" {
var fx = try parseFixture("1 + 2 * 3");
defer fx.arena.deinit();
// Expected AST: add(1, mul(2, 3))
const top = fx.expr.binop;
try testing.expectEqual(ast.BinOpKind.add, top.op);
try testing.expectEqual(@as(f64, 1), top.left.number);
const mul = top.right.binop;
try testing.expectEqual(ast.BinOpKind.mul, mul.op);
try testing.expectEqual(@as(f64, 2), mul.left.number);
try testing.expectEqual(@as(f64, 3), mul.right.number);
}
test "XPath.Parser: arithmetic left-associativity" {
var fx = try parseFixture("1 - 2 - 3");
defer fx.arena.deinit();
// Expected AST: sub(sub(1, 2), 3)
const top = fx.expr.binop;
try testing.expectEqual(ast.BinOpKind.sub, top.op);
try testing.expectEqual(@as(f64, 3), top.right.number);
const inner = top.left.binop;
try testing.expectEqual(ast.BinOpKind.sub, inner.op);
try testing.expectEqual(@as(f64, 1), inner.left.number);
try testing.expectEqual(@as(f64, 2), inner.right.number);
}
test "XPath.Parser: div and mod are operator-position keywords" {
var fx = try parseFixture("7 div 2");
defer fx.arena.deinit();
try testing.expectEqual(ast.BinOpKind.div, fx.expr.binop.op);
var fx2 = try parseFixture("7 mod 2");
defer fx2.arena.deinit();
try testing.expectEqual(ast.BinOpKind.mod, fx2.expr.binop.op);
}
test "XPath.Parser: comparison operators" {
inline for (.{
.{ "1 = 2", ast.BinOpKind.eq },
.{ "1 != 2", ast.BinOpKind.neq },
.{ "1 < 2", ast.BinOpKind.lt },
.{ "1 <= 2", ast.BinOpKind.lte },
.{ "1 > 2", ast.BinOpKind.gt },
.{ "1 >= 2", ast.BinOpKind.gte },
}) |case| {
var fx = try parseFixture(case[0]);
defer fx.arena.deinit();
try testing.expectEqual(case[1], fx.expr.binop.op);
}
}
test "XPath.Parser: logical or/and short-circuit chain" {
var fx = try parseFixture("a or b and c");
defer fx.arena.deinit();
// Expected AST: or(path(a), and(path(b), path(c))) — and binds tighter
const top = fx.expr.binop;
try testing.expectEqual(ast.BinOpKind.or_, top.op);
try testing.expectEqual(ast.BinOpKind.and_, top.right.binop.op);
}
test "XPath.Parser: unary minus" {
var fx = try parseFixture("-1");
defer fx.arena.deinit();
try testing.expectEqual(@as(f64, 1), fx.expr.neg.number);
}
test "XPath.Parser: union" {
var fx = try parseFixture("a | b");
defer fx.arena.deinit();
try testing.expectEqual(ast.BinOpKind.union_, fx.expr.binop.op);
}
test "XPath.Parser: absolute path / alone is document root" {
var fx = try parseFixture("/");
defer fx.arena.deinit();
const path = fx.expr.path;
try testing.expect(path.absolute);
try testing.expectEqual(@as(usize, 0), path.steps.len);
}
test "XPath.Parser: absolute path /foo" {
var fx = try parseFixture("/foo");
defer fx.arena.deinit();
const path = fx.expr.path;
try testing.expect(path.absolute);
try testing.expectEqual(@as(usize, 1), path.steps.len);
try testing.expectEqualStrings("foo", path.steps[0].node_test.name);
}
test "XPath.Parser: //foo expands to descendant-or-self::node()/foo" {
var fx = try parseFixture("//foo");
defer fx.arena.deinit();
const path = fx.expr.path;
try testing.expect(path.absolute);
try testing.expectEqual(@as(usize, 2), path.steps.len);
try testing.expectEqual(ast.Axis.descendant_or_self, path.steps[0].axis);
try testing.expectEqual(ast.TypeTest.node, path.steps[0].node_test.type_test);
try testing.expectEqualStrings("foo", path.steps[1].node_test.name);
}
test "XPath.Parser: relative path child::foo/bar" {
var fx = try parseFixture("foo/bar");
defer fx.arena.deinit();
const path = fx.expr.path;
try testing.expect(!path.absolute);
try testing.expectEqual(@as(usize, 2), path.steps.len);
try testing.expectEqual(ast.Axis.child, path.steps[0].axis);
try testing.expectEqualStrings("foo", path.steps[0].node_test.name);
try testing.expectEqualStrings("bar", path.steps[1].node_test.name);
}
test "XPath.Parser: abbreviated steps . and .." {
var fx = try parseFixture("./..");
defer fx.arena.deinit();
const path = fx.expr.path;
try testing.expectEqual(@as(usize, 2), path.steps.len);
try testing.expectEqual(ast.Axis.self, path.steps[0].axis);
try testing.expectEqual(ast.Axis.parent, path.steps[1].axis);
}
test "XPath.Parser: attribute axis @class" {
var fx = try parseFixture("@class");
defer fx.arena.deinit();
const step = fx.expr.path.steps[0];
try testing.expectEqual(ast.Axis.attribute, step.axis);
try testing.expectEqualStrings("class", step.node_test.name);
}
test "XPath.Parser: all 12 named axes parse correctly" {
inline for (.{
.{ "child::a", ast.Axis.child },
.{ "descendant::a", ast.Axis.descendant },
.{ "descendant-or-self::a", ast.Axis.descendant_or_self },
.{ "self::a", ast.Axis.self },
.{ "parent::a", ast.Axis.parent },
.{ "ancestor::a", ast.Axis.ancestor },
.{ "ancestor-or-self::a", ast.Axis.ancestor_or_self },
.{ "following-sibling::a", ast.Axis.following_sibling },
.{ "preceding-sibling::a", ast.Axis.preceding_sibling },
.{ "following::a", ast.Axis.following },
.{ "preceding::a", ast.Axis.preceding },
.{ "namespace::a", ast.Axis.namespace },
}) |case| {
var fx = try parseFixture(case[0]);
defer fx.arena.deinit();
try testing.expectEqual(case[1], fx.expr.path.steps[0].axis);
}
}
test "XPath.Parser: unknown axis name maps to .unknown — polyfill parity" {
var fx = try parseFixture("wibble::a");
defer fx.arena.deinit();
try testing.expectEqual(ast.Axis.unknown, fx.expr.path.steps[0].axis);
}
test "XPath.Parser: wildcard *" {
var fx = try parseFixture("*");
defer fx.arena.deinit();
try testing.expectEqualStrings("*", fx.expr.path.steps[0].node_test.name);
}
test "XPath.Parser: namespace-prefixed name and wildcard" {
var fx = try parseFixture("svg:rect");
defer fx.arena.deinit();
try testing.expectEqualStrings("svg:rect", fx.expr.path.steps[0].node_test.name);
var fx2 = try parseFixture("svg:*");
defer fx2.arena.deinit();
try testing.expectEqualStrings("svg:*", fx2.expr.path.steps[0].node_test.name);
}
test "XPath.Parser: node-type tests" {
inline for (.{
.{ "node()", ast.TypeTest.node },
.{ "text()", ast.TypeTest.text },
.{ "comment()", ast.TypeTest.comment },
.{ "processing-instruction()", ast.TypeTest.processing_instruction },
}) |case| {
var fx = try parseFixture(case[0]);
defer fx.arena.deinit();
try testing.expectEqual(case[1], fx.expr.path.steps[0].node_test.type_test);
}
}
test "XPath.Parser: processing-instruction with literal target — consumed but ignored" {
var fx = try parseFixture("processing-instruction('xml-stylesheet')");
defer fx.arena.deinit();
try testing.expectEqual(ast.TypeTest.processing_instruction, fx.expr.path.steps[0].node_test.type_test);
}
test "XPath.Parser: predicate on step" {
var fx = try parseFixture("p[1]");
defer fx.arena.deinit();
const step = fx.expr.path.steps[0];
try testing.expectEqual(@as(usize, 1), step.predicates.len);
try testing.expectEqual(@as(f64, 1), step.predicates[0].number);
}
test "XPath.Parser: multi-predicate step" {
var fx = try parseFixture("p[1][@x]");
defer fx.arena.deinit();
const step = fx.expr.path.steps[0];
try testing.expectEqual(@as(usize, 2), step.predicates.len);
}
test "XPath.Parser: filter expression with predicate parses as Filter, not Step" {
var fx = try parseFixture("(//a)[1]");
defer fx.arena.deinit();
// Top level is Filter wrapping a parenthesized path with one predicate.
const filt = fx.expr.filter;
try testing.expectEqual(@as(f64, 1), filt.predicate.number);
try testing.expect(filt.expr.path.absolute);
}
test "XPath.Parser: filter with multi-predicate nests" {
var fx = try parseFixture("(//a)[1][2]");
defer fx.arena.deinit();
const outer = fx.expr.filter;
try testing.expectEqual(@as(f64, 2), outer.predicate.number);
const inner = outer.expr.filter;
try testing.expectEqual(@as(f64, 1), inner.predicate.number);
}
test "XPath.Parser: filter with location-path tail (filter_path)" {
var fx = try parseFixture("(//a)/b");
defer fx.arena.deinit();
const fp = fx.expr.filter_path;
try testing.expect(fp.filter.path.absolute);
try testing.expectEqual(@as(usize, 1), fp.steps.len);
try testing.expectEqualStrings("b", fp.steps[0].node_test.name);
}
test "XPath.Parser: filter with // tail prepends descendant-or-self" {
var fx = try parseFixture("(//a)//b");
defer fx.arena.deinit();
const fp = fx.expr.filter_path;
try testing.expectEqual(@as(usize, 2), fp.steps.len);
try testing.expectEqual(ast.Axis.descendant_or_self, fp.steps[0].axis);
try testing.expectEqualStrings("b", fp.steps[1].node_test.name);
}
test "XPath.Parser: function call followed by predicate" {
var fx = try parseFixture("id('x')[1]");
defer fx.arena.deinit();
const filt = fx.expr.filter;
try testing.expectEqual(@as(f64, 1), filt.predicate.number);
try testing.expectEqualStrings("id", filt.expr.fn_call.name);
}
test "XPath.Parser: complex representative expression" {
var fx = try parseFixture("//div[@class='active']/p[position()<=last()-1]");
defer fx.arena.deinit();
const path = fx.expr.path;
try testing.expect(path.absolute);
try testing.expectEqual(@as(usize, 3), path.steps.len);
try testing.expectEqual(ast.Axis.descendant_or_self, path.steps[0].axis);
try testing.expectEqualStrings("div", path.steps[1].node_test.name);
try testing.expectEqual(@as(usize, 1), path.steps[1].predicates.len);
try testing.expectEqualStrings("p", path.steps[2].node_test.name);
try testing.expectEqual(@as(usize, 1), path.steps[2].predicates.len);
}
fn expectParseError(input: []const u8, expected: anyerror) !void {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
try testing.expectError(expected, parse(arena.allocator(), input));
}
test "XPath.Parser: error on unbalanced paren" {
try expectParseError("(1", error.UnexpectedToken);
}
test "XPath.Parser: error on unbalanced bracket" {
try expectParseError("p[1", error.UnexpectedToken);
}
test "XPath.Parser: error on missing node test" {
try expectParseError("child::", error.ExpectedNodeTest);
}
test "XPath.Parser: bare `+` falls through to step and reports missing node test" {
// Matches polyfill: + isn't a path/primary start, so the parser
// ends up in parseStep with no name to use as node test.
try expectParseError("+", error.ExpectedNodeTest);
}
test "XPath.Parser: error on trailing tokens" {
try expectParseError("1 2", error.UnexpectedToken);
}
test "XPath.Parser: empty string falls through to step and reports missing node test" {
try expectParseError("", error.ExpectedNodeTest);
}
test "XPath.Parser: 91-case battery — every expression parses" {
// 91-case XPath 1.0 conformance battery covering every expression
// shape the public API surface accepts. Each entry must parse
// without error.
const battery = [_][]const u8{
"/html",
"/html/body",
"/",
"//h1",
"//ul/li",
"//ul//li",
".",
".//li",
"//section/*",
"//*[@id='heading']",
"//li[1]/following-sibling::li",
"//li[5]/preceding-sibling::li",
"//li/parent::ul",
"//li/ancestor::body",
"//li/ancestor-or-self::body",
"//li[3]/preceding::li",
"//li[1]/following::li",
"//ul/descendant::li",
"//ul/descendant-or-self::li",
"//section[1]/child::span",
"//*[@id='heading']/self::h1",
"//a[1]/attribute::href",
"//a[1]/@*",
"//li[1]",
"//li[last()]",
"//li[last() - 1]",
"//li[position() = 1]",
"//li[position() > 2]",
"//li[position() mod 2 = 1]",
"(//li)[1]",
"(//section)[2]",
"//li[3]/preceding-sibling::li[1]",
"//li[5]/ancestor::*[1]",
"//li[contains(concat(' ', @class, ' '), ' even ')][2]",
"//*[@id='heading' and @class='primary']",
"//*[@id='heading' or @id='p1']",
"//section[a]",
"//section[count(span) = 2]",
"//ul[count(li) = 5]",
"//tr[td[1]]",
"//tr[td/text() = 'Bob']",
"//*[starts-with(@id, 'link')]",
"//*[normalize-space() = 'Hello World']",
"//*[normalize-space(.) = 'Item 1']",
"//*[concat(@id, '-x') = 'heading-x']",
"//*[substring(@id, 1, 1) = 'p']",
"//*[substring(@id, 2, 1) = '1' and starts-with(@id, 'p')]",
"//p[translate(@id, 'p', 'q') = 'q1']",
"//*[substring-before(@id, '1') = 'p']",
"//*[substring-after(@id, 'lin') = 'k1']",
"//tr[number(td[2]) > 28]",
"//tr[floor(number(td[2]) div 10) = 3]",
"//tr[ceiling(number(td[2]) div 10) = 3]",
"//tr[round(number(td[2]) div 10) = 3]",
"//ul[sum(li/@data-len) = 0]",
"//p[boolean(@lang)]",
"//*[false()]",
"//*[name() = 'h1']",
"//*[local-name() = 'h1']",
"id('heading')",
"id('heading p1')",
"id(//em/parent::p/@id)",
"//h1 | //title",
"//h1 | //*[@id='p1']",
"//*[@id='heading'] | //*[@id='heading']",
"//li[position() + 1 = 3]",
"//li[position() - 1 = 0]",
"//li[position() * 2 = 4]",
"//li[position() div 2 = 1]",
"//li[(position() mod 2) = 0]",
"//tr[number(td[2]) = 30]",
"//tr[number(td[2]) != 30]",
"//tr[number(td[2]) < 30]",
"//tr[number(td[2]) <= 30]",
"//tr[number(td[2]) > 30]",
"//tr[number(td[2]) >= 30]",
"//tr[td[2] = 30]",
"//tr[td[2] = '30']",
"//comment()",
".//a[contains(normalize-space(string(.)), 'Click me')]",
".//input[(./@type = 'text')]",
".//*[@id='heading']",
".//li[contains(concat(' ', @class, ' '), ' even ')]",
"//*[@id='heading']/text()",
"//em/parent::p",
"//p[em]",
"//p[not(em)]",
"//section[a/@href = '/foo']",
"//ul/li[last()][position() = last()]",
"//ul[string(count(li)) = '5']",
"//body[count(//*[contains(@class, 'item')]) = 5]",
};
try testing.expectEqual(@as(usize, 91), battery.len);
for (battery) |expr| {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
_ = parse(arena.allocator(), expr) catch |err| {
std.debug.print("\n failed to parse: {s}\n error: {s}\n", .{ expr, @errorName(err) });
return err;
};
}
}
test "XPath.Parser: deep parenthesization rejected past max_depth" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
var buf: std.ArrayList(u8) = .empty;
defer buf.deinit(testing.allocator);
try buf.appendNTimes(testing.allocator, '(', max_depth + 1);
try buf.append(testing.allocator, '1');
try buf.appendNTimes(testing.allocator, ')', max_depth + 1);
try testing.expectError(error.MaxDepthExceeded, parse(arena.allocator(), buf.items));
}
test "XPath.Parser: deep unary minus rejected past max_depth" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
var buf: std.ArrayList(u8) = .empty;
defer buf.deinit(testing.allocator);
try buf.appendNTimes(testing.allocator, '-', max_depth + 1);
try buf.append(testing.allocator, '1');
try testing.expectError(error.MaxDepthExceeded, parse(arena.allocator(), buf.items));
}

View File

@@ -0,0 +1,464 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! XPath 1.0 expression tokenizer.
//!
//! HTML-pragmatic behavior: lenient whitespace, case-preserving names,
//! no escape processing in string literals (use the other quote type
//! to embed), unknown characters silently skipped.
//!
//! The tokenizer borrows from the input slice and never allocates.
//! `next()` always returns a token; `.eof` is terminal and idempotent.
const std = @import("std");
const Tokenizer = @This();
pub const Token = union(enum) {
/// String literal: `'foo'` or `"foo"`. Quotes are stripped; escapes
/// are not interpreted (the polyfill takes the raw substring).
string: []const u8,
/// Numeric literal: `123`, `1.5`, `.5`, `5.`. f64 matches the
/// runtime number type.
number: f64,
/// Bare identifier — element/function/axis name, an `or`/`and`/
/// `div`/`mod` keyword, or a namespace-prefixed name (`prefix:local`,
/// `prefix:*`). The colon and optional wildcard are preserved
/// verbatim so the parser can split.
name: []const u8,
slash, // `/`
double_slash, // `//`
dot, // `.`
double_dot, // `..`
at, // `@`
lparen, // `(`
rparen, // `)`
lbracket, // `[`
rbracket, // `]`
comma, // `,`
pipe, // `|`
eq, // `=`
neq, // `!=`
lt, // `<`
lte, // `<=`
gt, // `>`
gte, // `>=`
plus, // `+`
minus, // `-`
star, // `*`
dollar, // `$`
double_colon, // `::`
eof,
};
input: []const u8,
position: usize = 0,
fn isEof(self: *const Tokenizer) bool {
return self.position >= self.input.len;
}
// True iff the input has at least `n` bytes left after the current one
// — i.e. `byteAt(n)` will not read past the end.
fn hasAtLeast(self: *const Tokenizer, n: usize) bool {
return self.position + n < self.input.len;
}
fn byteAt(self: *const Tokenizer, offset: usize) u8 {
return self.input[self.position + offset];
}
fn skipWhitespace(self: *Tokenizer) void {
while (!self.isEof()) {
switch (self.input[self.position]) {
' ', '\t', '\n', '\r' => self.position += 1,
else => return,
}
}
}
fn isNameStart(c: u8) bool {
return (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_';
}
fn isNameContinue(c: u8) bool {
return isNameStart(c) or std.ascii.isDigit(c) or c == '-' or c == '.';
}
fn consumeString(self: *Tokenizer, quote: u8) Token {
self.position += 1; // opening quote
const start = self.position;
while (!self.isEof() and self.input[self.position] != quote) {
self.position += 1;
}
const value = self.input[start..self.position];
// Closing quote skipped; at EOF we just emit what we have (polyfill parity).
if (!self.isEof()) self.position += 1;
return .{ .string = value };
}
fn consumeNumber(self: *Tokenizer) Token {
const start = self.position;
while (!self.isEof() and std.ascii.isDigit(self.input[self.position])) {
self.position += 1;
}
if (!self.isEof() and self.input[self.position] == '.') {
self.position += 1;
while (!self.isEof() and std.ascii.isDigit(self.input[self.position])) {
self.position += 1;
}
}
// Caller only enters consumeNumber on a digit or `.digit`, so the
// slice is always `\d+(\.\d*)?` or `\.\d+` — both accepted by
// parseFloat (verified against Zig 0.15.2).
const value = std.fmt.parseFloat(f64, self.input[start..self.position]) catch unreachable;
return .{ .number = value };
}
fn consumeName(self: *Tokenizer) Token {
const start = self.position;
while (!self.isEof() and isNameContinue(self.input[self.position])) {
self.position += 1;
}
// Optional namespace prefix: `prefix:local` or `prefix:*`. A `::`
// is the axis separator and belongs to the next token, so peek
// for a single `:` not followed by another `:`.
if (!self.isEof() and self.input[self.position] == ':' and
(self.position + 1 >= self.input.len or self.input[self.position + 1] != ':'))
{
self.position += 1; // `:`
if (!self.isEof() and self.input[self.position] == '*') {
self.position += 1;
} else {
while (!self.isEof() and isNameContinue(self.input[self.position])) {
self.position += 1;
}
}
}
return .{ .name = self.input[start..self.position] };
}
pub fn next(self: *Tokenizer) Token {
while (true) {
self.skipWhitespace();
if (self.isEof()) return .eof;
const c = self.byteAt(0);
if (c == '"' or c == '\'') {
return self.consumeString(c);
}
if (std.ascii.isDigit(c) or (c == '.' and self.hasAtLeast(1) and std.ascii.isDigit(self.byteAt(1)))) {
return self.consumeNumber();
}
if (self.hasAtLeast(1)) {
const c2 = self.byteAt(1);
switch (c) {
'/' => if (c2 == '/') {
self.position += 2;
return .double_slash;
},
':' => if (c2 == ':') {
self.position += 2;
return .double_colon;
},
'!' => if (c2 == '=') {
self.position += 2;
return .neq;
},
'<' => if (c2 == '=') {
self.position += 2;
return .lte;
},
'>' => if (c2 == '=') {
self.position += 2;
return .gte;
},
'.' => if (c2 == '.') {
self.position += 2;
return .double_dot;
},
else => {},
}
}
const single: ?Token = switch (c) {
'(' => .lparen,
')' => .rparen,
'[' => .lbracket,
']' => .rbracket,
',' => .comma,
'|' => .pipe,
'=' => .eq,
'<' => .lt,
'>' => .gt,
'+' => .plus,
'-' => .minus,
'*' => .star,
'$' => .dollar,
'/' => .slash,
'@' => .at,
'.' => .dot,
else => null,
};
if (single) |tok| {
self.position += 1;
return tok;
}
if (isNameStart(c)) {
return self.consumeName();
}
// Polyfill parity (decision #2): unknown characters are
// silently skipped, never an error.
self.position += 1;
}
}
const testing = std.testing;
fn expectTokens(input: []const u8, expected: []const Token) !void {
var tokenizer = Tokenizer{ .input = input };
for (expected) |exp| {
const got = tokenizer.next();
try testing.expectEqualDeep(exp, got);
}
}
test "XPath.Tokenizer: empty input emits EOF" {
try expectTokens("", &.{.eof});
}
test "XPath.Tokenizer: only whitespace emits EOF" {
try expectTokens(" \t\n\r ", &.{.eof});
}
test "XPath.Tokenizer: EOF idempotent past end" {
var t = Tokenizer{ .input = "" };
try testing.expectEqual(Token.eof, t.next());
try testing.expectEqual(Token.eof, t.next());
try testing.expectEqual(Token.eof, t.next());
}
test "XPath.Tokenizer: single-char operators" {
try expectTokens("()[],|=<>+-*$/@.", &.{
.lparen, .rparen, .lbracket, .rbracket, .comma, .pipe,
.eq, .lt, .gt, .plus, .minus, .star,
.dollar, .slash, .at, .dot, .eof,
});
}
test "XPath.Tokenizer: two-char operators" {
try expectTokens("// :: != <= >= ..", &.{
.double_slash, .double_colon, .neq, .lte, .gte, .double_dot, .eof,
});
}
test "XPath.Tokenizer: two-char vs single-char disambiguation" {
try expectTokens("/a/b", &.{
.slash, .{ .name = "a" }, .slash, .{ .name = "b" }, .eof,
});
try expectTokens("//a", &.{ .double_slash, .{ .name = "a" }, .eof });
try expectTokens("a<b", &.{
.{ .name = "a" }, .lt, .{ .name = "b" }, .eof,
});
try expectTokens("a<=b", &.{
.{ .name = "a" }, .lte, .{ .name = "b" }, .eof,
});
}
test "XPath.Tokenizer: string literal double quote" {
try expectTokens("\"hello world\"", &.{ .{ .string = "hello world" }, .eof });
}
test "XPath.Tokenizer: string literal single quote" {
try expectTokens("'hello world'", &.{ .{ .string = "hello world" }, .eof });
}
test "XPath.Tokenizer: string embeds the other quote type" {
try expectTokens("\"it's\"", &.{ .{ .string = "it's" }, .eof });
try expectTokens("'say \"hi\"'", &.{ .{ .string = "say \"hi\"" }, .eof });
}
test "XPath.Tokenizer: empty string literal" {
try expectTokens("''", &.{ .{ .string = "" }, .eof });
try expectTokens("\"\"", &.{ .{ .string = "" }, .eof });
}
test "XPath.Tokenizer: unterminated string emits partial — polyfill parity" {
try expectTokens("'unterminated", &.{ .{ .string = "unterminated" }, .eof });
try expectTokens("\"oops", &.{ .{ .string = "oops" }, .eof });
}
test "XPath.Tokenizer: integer literals" {
try expectTokens("0", &.{ .{ .number = 0 }, .eof });
try expectTokens("42", &.{ .{ .number = 42 }, .eof });
try expectTokens("12345", &.{ .{ .number = 12345 }, .eof });
}
test "XPath.Tokenizer: float literals" {
try expectTokens("3.14", &.{ .{ .number = 3.14 }, .eof });
try expectTokens("0.5", &.{ .{ .number = 0.5 }, .eof });
}
test "XPath.Tokenizer: leading-dot float (.5)" {
try expectTokens(".5", &.{ .{ .number = 0.5 }, .eof });
try expectTokens(".25", &.{ .{ .number = 0.25 }, .eof });
}
test "XPath.Tokenizer: trailing-dot float (5.)" {
try expectTokens("5.", &.{ .{ .number = 5 }, .eof });
}
test "XPath.Tokenizer: leading zeros are decimal, not octal" {
try expectTokens("007", &.{ .{ .number = 7 }, .eof });
try expectTokens("0042", &.{ .{ .number = 42 }, .eof });
}
test "XPath.Tokenizer: multi-digit fraction parses with parseFloat precision" {
// Anchors that the slice is round-tripped through parseFloat (the
// polyfill calls Number()). The old hand-rolled `place *= 0.1`
// accumulator drifted on long fractions.
try expectTokens("0.123456789", &.{ .{ .number = 0.123456789 }, .eof });
try expectTokens("123.456", &.{ .{ .number = 123.456 }, .eof });
}
test "XPath.Tokenizer: dot followed by non-digit emits dot token" {
try expectTokens(".x", &.{ .dot, .{ .name = "x" }, .eof });
try expectTokens(".", &.{ .dot, .eof });
try expectTokens(". 3", &.{ .dot, .{ .number = 3 }, .eof });
}
test "XPath.Tokenizer: bare identifier" {
try expectTokens("foo", &.{ .{ .name = "foo" }, .eof });
try expectTokens("_x", &.{ .{ .name = "_x" }, .eof });
try expectTokens("MixedCase", &.{ .{ .name = "MixedCase" }, .eof });
}
test "XPath.Tokenizer: identifier with digits, dashes, dots" {
try expectTokens("foo-bar", &.{ .{ .name = "foo-bar" }, .eof });
try expectTokens("foo.bar", &.{ .{ .name = "foo.bar" }, .eof });
try expectTokens("a1b2", &.{ .{ .name = "a1b2" }, .eof });
}
test "XPath.Tokenizer: namespace-prefixed name" {
try expectTokens("xhtml:div", &.{ .{ .name = "xhtml:div" }, .eof });
try expectTokens("svg:*", &.{ .{ .name = "svg:*" }, .eof });
}
test "XPath.Tokenizer: name followed by `::` keeps the colon for the axis token" {
try expectTokens("child::node", &.{
.{ .name = "child" }, .double_colon, .{ .name = "node" }, .eof,
});
}
test "XPath.Tokenizer: name immediately followed by `(` is two tokens" {
// Function-call detection happens in the parser.
try expectTokens("count()", &.{
.{ .name = "count" }, .lparen, .rparen, .eof,
});
}
test "XPath.Tokenizer: keywords or/and/div/mod tokenize as plain names" {
try expectTokens("a or b", &.{
.{ .name = "a" }, .{ .name = "or" }, .{ .name = "b" }, .eof,
});
try expectTokens("3 div 4", &.{
.{ .number = 3 }, .{ .name = "div" }, .{ .number = 4 }, .eof,
});
}
test "XPath.Tokenizer: unknown character silently skipped" {
try expectTokens("?foo", &.{ .{ .name = "foo" }, .eof });
try expectTokens("foo?bar", &.{
.{ .name = "foo" }, .{ .name = "bar" }, .eof,
});
}
test "XPath.Tokenizer: representative path expression" {
try expectTokens("//div[@class='x']/p[2]", &.{
.double_slash,
.{ .name = "div" },
.lbracket,
.at,
.{ .name = "class" },
.eq,
.{ .string = "x" },
.rbracket,
.slash,
.{ .name = "p" },
.lbracket,
.{ .number = 2 },
.rbracket,
.eof,
});
}
test "XPath.Tokenizer: representative axis + predicate expression" {
try expectTokens(
"ancestor-or-self::section/following-sibling::*[position()<=last()-1]",
&.{
.{ .name = "ancestor-or-self" },
.double_colon,
.{ .name = "section" },
.slash,
.{ .name = "following-sibling" },
.double_colon,
.star,
.lbracket,
.{ .name = "position" },
.lparen,
.rparen,
.lte,
.{ .name = "last" },
.lparen,
.rparen,
.minus,
.{ .number = 1 },
.rbracket,
.eof,
},
);
}
test "XPath.Tokenizer: parent-axis abbreviation" {
try expectTokens("../foo", &.{
.double_dot, .slash, .{ .name = "foo" }, .eof,
});
}
test "XPath.Tokenizer: filter expression with predicate" {
try expectTokens("(//a)[1]", &.{
.lparen, .double_slash, .{ .name = "a" }, .rparen,
.lbracket, .{ .number = 1 }, .rbracket, .eof,
});
}
test "XPath.Tokenizer: variable reference" {
try expectTokens("$x + 1", &.{
.dollar, .{ .name = "x" }, .plus, .{ .number = 1 }, .eof,
});
}

133
src/browser/xpath/ast.zig Normal file
View File

@@ -0,0 +1,133 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! XPath 1.0 AST.
//!
//! Slices and pointers are arena-owned by the Parser; the AST has no
//! destructor.
pub const Expr = union(enum) {
/// Absolute or relative location path: `/foo/bar`, `//x`, `foo/bar`.
path: Path,
/// Filter expression followed by a location-path tail:
/// `(//a)/b`, `(expr)//c`.
filter_path: FilterPath,
/// Filter expression with a single predicate: `(expr)[n]`.
/// Multi-predicate filters nest: `(e)[1][2]` → filter(filter(e,1),2).
filter: Filter,
binop: BinOp,
/// Unary minus. The polyfill has no unary `+`.
neg: *Expr,
/// String literal, quotes stripped.
literal: []const u8,
/// Numeric literal, parsed to f64.
number: f64,
/// Variable reference. The leading `$` is stripped; per decision #3
/// the evaluator always returns the empty string.
var_ref: []const u8,
fn_call: FnCall,
};
pub const Path = struct {
absolute: bool,
steps: []const Step,
};
pub const FilterPath = struct {
filter: *Expr,
steps: []const Step,
};
pub const Filter = struct {
expr: *Expr,
predicate: *Expr,
};
pub const BinOp = struct {
op: BinOpKind,
left: *Expr,
right: *Expr,
};
pub const BinOpKind = enum {
or_,
and_,
eq,
neq,
lt,
gt,
lte,
gte,
add,
sub,
mul,
div,
mod,
union_,
};
pub const FnCall = struct {
name: []const u8,
args: []const *Expr,
};
pub const Step = struct {
axis: Axis,
node_test: NodeTest,
predicates: []const *Expr,
};
pub const Axis = enum {
child,
descendant,
descendant_or_self,
self,
parent,
ancestor,
ancestor_or_self,
following_sibling,
preceding_sibling,
following,
preceding,
attribute,
namespace,
/// Polyfill parity (decision #2): unknown axis names parse to
/// this variant; the evaluator returns an empty node-set.
unknown,
};
pub const NodeTest = union(enum) {
/// Element / attribute name. `"*"` is the wildcard. Namespaced forms
/// (`prefix:*`, `prefix:local`) are stored verbatim — the evaluator
/// does not split them, so they fall through to a literal `mem.eql`
/// against the node name (consistent with the `namespace::` axis stub
/// per decision #3).
/// TODO: real namespace support if the polyfill ever drops the stub.
name: []const u8,
/// `node()`, `text()`, `comment()`, `processing-instruction()`.
/// The optional target literal of `processing-instruction("foo")`
/// is consumed but not stored (decision #3 stub).
type_test: TypeTest,
};
pub const TypeTest = enum {
node,
text,
comment,
processing_instruction,
};

View File

@@ -0,0 +1,630 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! XPath 1.0 core function library — 25 functions covering the spec's
//! core function set. `position()` and `last()` live in
//! `Evaluator.evalFnCall` because they need the `(pos, size)` closure
//! that this module never sees.
//!
//! Args are pre-evaluated by the caller (`Evaluator.evalFnCall`). Eager
//! evaluation is fine here — short-circuit operators (`or`/`and`) are
//! binops, not function calls, so laziness isn't required. The
//! pre-evaluation contract also keeps functions.zig free of a circular
//! import on Evaluator.zig.
//!
//! Stubs per decision #3:
//! - `lang(string)` → always false
//! - `namespace-uri(...)` → always ""
//! - `name`/`local-name` → lowercased (HTML pragmatism)
//!
//! Allocations land in the caller's per-evaluation arena.
const std = @import("std");
const lp = @import("lightpanda");
const Node = @import("../webapi/Node.zig");
const result = @import("result.zig");
const Frame = lp.Frame;
const Element = Node.Element;
const Document = Node.Document;
const Allocator = std.mem.Allocator;
pub const Error = error{
OutOfMemory,
WriteFailed,
StringTooLarge,
UnknownFunction,
};
/// Dispatch a core-library function call. Returns `error.UnknownFunction`
/// if `name` doesn't match — the caller (Evaluator) handles
/// `position()` / `last()` inline before getting here, so this is the
/// last lookup stop.
pub fn call(
arena: Allocator,
name: []const u8,
args: []const result.Result,
ctx: *Node,
frame: *Frame,
) Error!result.Result {
// -- Node-set --
if (eql(name, "count")) return .{ .number = countFn(args) };
if (eql(name, "id")) return idFn(arena, args, ctx, frame);
if (eql(name, "local-name")) return .{ .string = try localNameFn(arena, args, ctx) };
if (eql(name, "name")) return .{ .string = try nameFn(arena, args, ctx) };
if (eql(name, "namespace-uri")) return .{ .string = "" };
// -- String --
if (eql(name, "string")) return .{ .string = try stringFn(arena, args, ctx) };
if (eql(name, "concat")) return .{ .string = try concatFn(arena, args) };
if (eql(name, "starts-with")) return .{ .boolean = try startsWithFn(arena, args) };
if (eql(name, "contains")) return .{ .boolean = try containsFn(arena, args) };
if (eql(name, "substring-before")) return .{ .string = try substringBeforeFn(arena, args) };
if (eql(name, "substring-after")) return .{ .string = try substringAfterFn(arena, args) };
if (eql(name, "substring")) return .{ .string = try substringFn(arena, args) };
if (eql(name, "string-length")) return .{ .number = try stringLengthFn(arena, args, ctx) };
if (eql(name, "normalize-space")) return .{ .string = try normalizeSpaceFn(arena, args, ctx) };
if (eql(name, "translate")) return .{ .string = try translateFn(arena, args) };
// -- Boolean --
if (eql(name, "boolean")) return .{ .boolean = if (args.len == 0) false else result.toBoolean(args[0]) };
if (eql(name, "not")) return .{ .boolean = if (args.len == 0) true else !result.toBoolean(args[0]) };
if (eql(name, "true")) return .{ .boolean = true };
if (eql(name, "false")) return .{ .boolean = false };
if (eql(name, "lang")) return .{ .boolean = false };
// -- Number --
if (eql(name, "number")) return .{ .number = try numberFn(arena, args, ctx) };
if (eql(name, "sum")) return .{ .number = try sumFn(arena, args) };
if (eql(name, "floor")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.floor(try result.toNumber(arena, args[0])) };
if (eql(name, "ceiling")) return .{ .number = if (args.len == 0) std.math.nan(f64) else std.math.ceil(try result.toNumber(arena, args[0])) };
if (eql(name, "round")) return .{ .number = if (args.len == 0) std.math.nan(f64) else roundHalfToPosInf(try result.toNumber(arena, args[0])) };
return error.UnknownFunction;
}
inline fn eql(a: []const u8, b: []const u8) bool {
return std.mem.eql(u8, a, b);
}
// ----- node-set fns -----
fn countFn(args: []const result.Result) f64 {
if (args.len == 0 or args[0] != .node_set) return 0;
return @floatFromInt(args[0].node_set.len);
}
fn idFn(arena: Allocator, args: []const result.Result, ctx: *Node, frame: *Frame) Error!result.Result {
if (args.len == 0) return .{ .node_set = &.{} };
// Polyfill: node-set arg → join `stringVal(n)` of each by ' '. Scalar
// arg → `toStr`. Then split on whitespace and look up each token.
const id_str: []const u8 = blk: {
if (args[0] == .node_set) {
var buf = std.Io.Writer.Allocating.init(arena);
for (args[0].node_set, 0..) |n, i| {
if (i > 0) try buf.writer.writeByte(' ');
const sv = try result.stringValueOf(arena, n);
try buf.writer.writeAll(sv);
}
break :blk buf.written();
}
break :blk try result.toString(arena, args[0]);
};
// `ctx.ownerDocument || ctx` — document nodes own themselves.
const doc = ctx.ownerDocument(frame) orelse (ctx.is(Document) orelse return .{ .node_set = &.{} });
var seen: std.AutoArrayHashMapUnmanaged(*Node, void) = .empty;
var it = std.mem.tokenizeAny(u8, id_str, &std.ascii.whitespace);
while (it.next()) |tok| {
if (doc.getElementById(tok, frame)) |el| {
try seen.put(arena, el.asNode(), {});
}
}
return .{ .node_set = seen.keys() };
}
fn localNameFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 {
const node = firstNodeOrCtx(args, ctx) orelse return "";
// For Element, `getLocalName` returns a slice into `_tag_name`
// (lowercase, namespace-prefix stripped) — lifetime exceeds the
// per-evaluation arena, so we borrow instead of duping.
if (node.is(Element)) |el| return el.getLocalName();
var buf: [256]u8 = undefined;
return std.ascii.allocLowerString(arena, node.getNodeName(&buf));
}
fn nameFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 {
const node = firstNodeOrCtx(args, ctx) orelse return "";
// Diverges from `local-name` only on namespaced elements: `name`
// keeps the prefix (`ns:foo`), `local-name` strips it (`foo`).
if (node.is(Element)) |el| return el.getTagNameLower();
var buf: [256]u8 = undefined;
return std.ascii.allocLowerString(arena, node.getNodeName(&buf));
}
fn firstNodeOrCtx(args: []const result.Result, ctx: *Node) ?*Node {
if (args.len == 0) return ctx;
if (args[0] != .node_set) return null;
if (args[0].node_set.len == 0) return null;
return args[0].node_set[0];
}
// ----- string fns -----
fn stringFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 {
if (args.len == 0) return try result.stringValueOf(arena, ctx);
return try result.toString(arena, args[0]);
}
fn concatFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
var buf = std.Io.Writer.Allocating.init(arena);
for (args) |a| {
const s = try result.toString(arena, a);
try buf.writer.writeAll(s);
}
return buf.written();
}
fn startsWithFn(arena: Allocator, args: []const result.Result) Error!bool {
if (args.len < 2) return false;
const s1 = try result.toString(arena, args[0]);
const s2 = try result.toString(arena, args[1]);
return std.mem.startsWith(u8, s1, s2);
}
fn containsFn(arena: Allocator, args: []const result.Result) Error!bool {
if (args.len < 2) return false;
const s1 = try result.toString(arena, args[0]);
const s2 = try result.toString(arena, args[1]);
return std.mem.indexOf(u8, s1, s2) != null;
}
fn substringBeforeFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
if (args.len < 2) return "";
const s1 = try result.toString(arena, args[0]);
const s2 = try result.toString(arena, args[1]);
if (std.mem.indexOf(u8, s1, s2)) |idx| {
return s1[0..idx];
}
return "";
}
fn substringAfterFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
if (args.len < 2) return "";
const s1 = try result.toString(arena, args[0]);
const s2 = try result.toString(arena, args[1]);
if (std.mem.indexOf(u8, s1, s2)) |idx| {
return s1[idx + s2.len ..];
}
return "";
}
fn substringFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
if (args.len < 2) return "";
const s = try result.toString(arena, args[0]);
const start_raw = try result.toNumber(arena, args[1]);
if (std.math.isNan(start_raw)) return "";
const start = roundHalfToPosInf(start_raw);
const s_len: f64 = @floatFromInt(s.len);
if (args.len >= 3) {
const len_raw = try result.toNumber(arena, args[2]);
if (std.math.isNan(len_raw)) return "";
const len = roundHalfToPosInf(len_raw);
const sum = start - 1 + len;
// -inf + inf is NaN; @intFromFloat(NaN) is illegal behavior.
if (std.math.isNan(sum)) return "";
const si_f = @max(start - 1, 0);
const ei_f = @min(sum, s_len);
if (si_f >= ei_f) return "";
const si: usize = @intFromFloat(si_f);
const ei: usize = @intFromFloat(ei_f);
return s[si..ei];
}
const si_f = @max(start - 1, 0);
if (si_f >= s_len) return "";
const si: usize = @intFromFloat(si_f);
return s[si..];
}
fn stringLengthFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error!f64 {
const s = if (args.len == 0)
try result.stringValueOf(arena, ctx)
else
try result.toString(arena, args[0]);
// Polyfill returns UTF-16 code units; we return UTF-8 bytes. They
// agree on ASCII (the gem's 91-case battery is ASCII-only). See
// .claude/skills/xpath-port/NOTES.md for the divergence rationale.
return @floatFromInt(s.len);
}
fn normalizeSpaceFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error![]const u8 {
const s = if (args.len == 0)
try result.stringValueOf(arena, ctx)
else
try result.toString(arena, args[0]);
const trimmed = std.mem.trim(u8, s, &std.ascii.whitespace);
if (trimmed.len == 0) return "";
var buf = std.Io.Writer.Allocating.init(arena);
var prev_space = false;
for (trimmed) |c| {
if (std.ascii.isWhitespace(c)) {
if (!prev_space) try buf.writer.writeByte(' ');
prev_space = true;
} else {
try buf.writer.writeByte(c);
prev_space = false;
}
}
return buf.written();
}
fn translateFn(arena: Allocator, args: []const result.Result) Error![]const u8 {
if (args.len < 3) return "";
const s = try result.toString(arena, args[0]);
const from = try result.toString(arena, args[1]);
const to = try result.toString(arena, args[2]);
var buf = std.Io.Writer.Allocating.init(arena);
for (s) |c| {
if (std.mem.indexOfScalar(u8, from, c)) |idx| {
// Chars in `from` past `to.len` are deleted (no copy).
if (idx < to.len) try buf.writer.writeByte(to[idx]);
} else {
try buf.writer.writeByte(c);
}
}
return buf.written();
}
// ----- number fns -----
fn numberFn(arena: Allocator, args: []const result.Result, ctx: *Node) Error!f64 {
if (args.len == 0) {
const sv = try result.stringValueOf(arena, ctx);
return result.stringToNumber(sv);
}
return try result.toNumber(arena, args[0]);
}
fn sumFn(arena: Allocator, args: []const result.Result) Error!f64 {
if (args.len == 0 or args[0] != .node_set) return std.math.nan(f64);
var total: f64 = 0;
for (args[0].node_set) |n| {
const sv = try result.stringValueOf(arena, n);
total += result.stringToNumber(sv);
}
return total;
}
/// Round half toward positive infinity. Matches JS `Math.round` (the
/// polyfill calls it for both `round()` and `substring()`):
/// round(0.5) = 1 round(-0.5) = 0
/// round(1.5) = 2 round(-1.5) = -1
/// Diverges from Zig's `@round` (away from zero): `@round(-0.5) = -1`.
fn roundHalfToPosInf(n: f64) f64 {
if (std.math.isNan(n) or !std.math.isFinite(n)) return n;
return std.math.floor(n + 0.5);
}
// ---------------------------------------------------------------------
// Tests — pure-logic only. Functions that need a real DOM (id, name,
// local-name, string with element ctx, sum, count of node-set, etc.)
// are exercised via Phase 9 HTML fixtures in tests/xpath/.
// ---------------------------------------------------------------------
const testing = std.testing;
const Tokenizer = @import("Tokenizer.zig");
const Parser = @import("Parser.zig");
const Evaluator = @import("Evaluator.zig");
fn evalScalar(a: Allocator, src: []const u8) !result.Result {
const expr = try Parser.parse(a, src);
// Synthetic Frame/Node pointers — the public `evaluate` entry only
// touches the Frame for path/axis evaluation. Pure-scalar expressions
// (arithmetic, function calls returning scalars) never deref it.
return Evaluator.evaluate(a, expr, @ptrFromInt(0x2000), @ptrFromInt(0x1000));
}
test "Functions: count() of non-node-set returns 0" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const r = try evalScalar(arena.allocator(), "count('hello')");
try testing.expect(r == .number);
try testing.expectEqual(@as(f64, 0), r.number);
}
test "Functions: string() on scalar coerces" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
inline for (.{
.{ "string(42)", "42" },
.{ "string(3.14)", "3.14" },
.{ "string(true())", "true" },
.{ "string(false())", "false" },
.{ "string('hello')", "hello" },
.{ "string(0)", "0" },
.{ "string(-1)", "-1" },
}) |case| {
const r = try evalScalar(a, case[0]);
try testing.expect(r == .string);
try testing.expectEqualStrings(case[1], r.string);
}
}
test "Functions: concat() variadic" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
inline for (.{
.{ "concat('a', 'b')", "ab" },
.{ "concat('a', 'b', 'c')", "abc" },
.{ "concat('foo', '-', 'bar', '-', 'baz')", "foo-bar-baz" },
.{ "concat('x', 1, 'y')", "x1y" },
}) |case| {
const r = try evalScalar(a, case[0]);
try testing.expect(r == .string);
try testing.expectEqualStrings(case[1], r.string);
}
}
test "Functions: starts-with / contains" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
inline for (.{
.{ "starts-with('hello', 'he')", true },
.{ "starts-with('hello', 'el')", false },
.{ "starts-with('hello', '')", true },
.{ "contains('hello world', 'wor')", true },
.{ "contains('hello', 'xyz')", false },
.{ "contains('hello', '')", true },
}) |case| {
const r = try evalScalar(a, case[0]);
try testing.expect(r == .boolean);
try testing.expectEqual(case[1], r.boolean);
}
}
test "Functions: substring-before / substring-after" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
inline for (.{
.{ "substring-before('1999/04/01', '/')", "1999" },
.{ "substring-before('hello', 'xyz')", "" },
.{ "substring-after('1999/04/01', '/')", "04/01" },
.{ "substring-after('hello', 'xyz')", "" },
.{ "substring-after('hello', '')", "hello" },
}) |case| {
const r = try evalScalar(a, case[0]);
try testing.expect(r == .string);
try testing.expectEqualStrings(case[1], r.string);
}
}
test "Functions: substring() — XPath 1-based, rounding, NaN handling" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
inline for (.{
.{ "substring('12345', 2, 3)", "234" },
.{ "substring('12345', 2)", "2345" },
// XPath spec example: round(1.5) = 2 → start at pos 2, len 2.
.{ "substring('12345', 1.5, 2.6)", "234" },
// start = 0: si = max(-1, 0) = 0, ei = min(0 - 1 + 3, len) = 2.
.{ "substring('12345', 0, 3)", "12" },
// Negative start clamps to 0.
.{ "substring('12345', -3, 7)", "123" },
// NaN start.
.{ "substring('12345', 'foo')", "" },
// NaN length.
.{ "substring('12345', 1, 'foo')", "" },
}) |case| {
const r = try evalScalar(a, case[0]);
try testing.expect(r == .string);
try testing.expectEqualStrings(case[1], r.string);
}
}
test "Functions: string-length on scalar arg" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
inline for (.{
.{ "string-length('hello')", 5 },
.{ "string-length('')", 0 },
.{ "string-length('a b c')", 5 },
}) |case| {
const r = try evalScalar(a, case[0]);
try testing.expect(r == .number);
try testing.expectEqual(@as(f64, case[1]), r.number);
}
}
test "Functions: normalize-space" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
inline for (.{
.{ "normalize-space(' hello world ')", "hello world" },
.{ "normalize-space('hello')", "hello" },
.{ "normalize-space('')", "" },
.{ "normalize-space(' ')", "" },
.{ "normalize-space('a\tb\nc')", "a b c" },
}) |case| {
const r = try evalScalar(a, case[0]);
try testing.expect(r == .string);
try testing.expectEqualStrings(case[1], r.string);
}
}
test "Functions: translate" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
inline for (.{
// Standard XPath spec example.
.{ "translate('bar', 'abc', 'ABC')", "BAr" },
// Char in `from` past `to.len` is deleted.
.{ "translate('--aaa--', 'abc-', 'ABC')", "AAA" },
.{ "translate('hello', '', '')", "hello" },
// Identity.
.{ "translate('abc', 'abc', 'abc')", "abc" },
}) |case| {
const r = try evalScalar(a, case[0]);
try testing.expect(r == .string);
try testing.expectEqualStrings(case[1], r.string);
}
}
test "Functions: boolean / not / true / false / lang" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
inline for (.{
.{ "true()", true },
.{ "false()", false },
.{ "not(true())", false },
.{ "not(false())", true },
.{ "boolean(1)", true },
.{ "boolean(0)", false },
.{ "boolean('')", false },
.{ "boolean('x')", true },
// lang is a stub — always false.
.{ "lang('en')", false },
}) |case| {
const r = try evalScalar(a, case[0]);
try testing.expect(r == .boolean);
try testing.expectEqual(case[1], r.boolean);
}
}
test "Functions: number() on scalar arg" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
{
const r = try evalScalar(a, "number('42')");
try testing.expectEqual(@as(f64, 42), r.number);
}
{
const r = try evalScalar(a, "number(true())");
try testing.expectEqual(@as(f64, 1), r.number);
}
{
const r = try evalScalar(a, "number(false())");
try testing.expectEqual(@as(f64, 0), r.number);
}
{
const r = try evalScalar(a, "number('foo')");
try testing.expect(std.math.isNan(r.number));
}
}
test "Functions: floor / ceiling / round" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
inline for (.{
.{ "floor(1.5)", 1 },
.{ "floor(-1.5)", -2 },
.{ "floor(0)", 0 },
.{ "ceiling(1.5)", 2 },
.{ "ceiling(-1.5)", -1 },
.{ "ceiling(0)", 0 },
// Half-toward-positive-infinity (JS Math.round behavior).
.{ "round(0.5)", 1 },
.{ "round(-0.5)", 0 },
.{ "round(1.5)", 2 },
.{ "round(-1.5)", -1 },
.{ "round(2.5)", 3 },
}) |case| {
const r = try evalScalar(a, case[0]);
try testing.expect(r == .number);
try testing.expectEqual(@as(f64, case[1]), r.number);
}
}
test "Functions: round/floor/ceiling propagate NaN and Infinity" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
{
const r = try evalScalar(a, "round(1 div 0)"); // +Infinity
try testing.expect(std.math.isPositiveInf(r.number));
}
{
const r = try evalScalar(a, "round(0 div 0)"); // NaN
try testing.expect(std.math.isNan(r.number));
}
{
const r = try evalScalar(a, "floor(0 div 0)");
try testing.expect(std.math.isNan(r.number));
}
{
const r = try evalScalar(a, "ceiling(0 div 0)");
try testing.expect(std.math.isNan(r.number));
}
}
test "Functions: sum / count on non-node-set defaults" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
{
const r = try evalScalar(a, "sum('hello')");
try testing.expect(std.math.isNan(r.number));
}
{
const r = try evalScalar(a, "count('hello')");
try testing.expectEqual(@as(f64, 0), r.number);
}
}
test "Functions: roundHalfToPosInf" {
try testing.expectEqual(@as(f64, 1), roundHalfToPosInf(0.5));
try testing.expectEqual(@as(f64, 0), roundHalfToPosInf(-0.5));
try testing.expectEqual(@as(f64, 2), roundHalfToPosInf(1.5));
try testing.expectEqual(@as(f64, -1), roundHalfToPosInf(-1.5));
try testing.expectEqual(@as(f64, 3), roundHalfToPosInf(2.5));
try testing.expect(std.math.isNan(roundHalfToPosInf(std.math.nan(f64))));
try testing.expect(std.math.isPositiveInf(roundHalfToPosInf(std.math.inf(f64))));
try testing.expect(std.math.isNegativeInf(roundHalfToPosInf(-std.math.inf(f64))));
}

View File

@@ -0,0 +1,199 @@
// Copyright (C) 2023-2026 Lightpanda (Selecy SAS)
//
// Francis Bouvier <francis@lightpanda.io>
// Pierre Tachoire <pierre@lightpanda.io>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//! XPath 1.0 runtime values.
//!
//! Tagged union over the four XPath value types: node-set, number,
//! string, boolean. Type coercion (`toString`, `toNumber`, `toBoolean`)
//! follows XPath 1.0 spec §3, with HTML-pragmatic shortcuts (decision
//! #2).
const std = @import("std");
const Node = @import("../webapi/Node.zig");
const CData = Node.CData;
const Allocator = std.mem.Allocator;
pub const Result = union(enum) {
/// Owned by the evaluator's arena. Order is significant only at the
/// public boundary, where the evaluator sorts to document order.
node_set: []const *Node,
number: f64,
string: []const u8,
boolean: bool,
};
/// XPath spec §5: string-value of a node.
///
/// - Element / Document: concatenated text descendants (excluding
/// comments and processing-instructions; matches `Node.getTextContent`)
/// - Attribute: attribute value
/// - Text / Comment / CDATA / PI: the node's data
/// - DocumentType / DocumentFragment: empty (matches polyfill's
/// `nodeValue || textContent || ''` fallthrough)
///
/// The returned slice is borrowed from the node for cdata/attribute
/// (cheap, no allocation) and arena-allocated for element/document
/// (concatenation buffer).
pub fn stringValueOf(arena: Allocator, node: *Node) error{WriteFailed}![]const u8 {
return switch (node._type) {
.attribute => |attr| attr._value.str(),
.cdata => |cd| cd._data.str(),
.element, .document => blk: {
var buf = std.Io.Writer.Allocating.init(arena);
try node.getTextContent(&buf.writer);
break :blk buf.written();
},
.document_type, .document_fragment => "",
};
}
pub fn toBoolean(val: Result) bool {
return switch (val) {
.boolean => |b| b,
.number => |n| n != 0 and !std.math.isNan(n),
.string => |s| s.len > 0,
.node_set => |ns| ns.len > 0,
};
}
/// Numeric coercion. Empty / whitespace-only strings produce NaN
/// (XPath spec §4.4 — matches JS `Number(' ') === 0` *not* applying
/// because the polyfill calls `s.trim() === '' ? NaN : Number(s)`).
pub fn toNumber(arena: Allocator, val: Result) error{WriteFailed}!f64 {
return switch (val) {
.number => |n| n,
.boolean => |b| if (b) 1 else 0,
.string => |s| stringToNumber(s),
.node_set => |ns| blk: {
if (ns.len == 0) break :blk std.math.nan(f64);
const sv = try stringValueOf(arena, ns[0]);
break :blk stringToNumber(sv);
},
};
}
pub fn stringToNumber(s: []const u8) f64 {
const trimmed = std.mem.trim(u8, s, &std.ascii.whitespace);
if (trimmed.len == 0) return std.math.nan(f64);
return std.fmt.parseFloat(f64, trimmed) catch std.math.nan(f64);
}
/// String coercion. Allocates only for `.number` (formatting) and for
/// `.node_set` whose first node is an Element/Document (text content
/// concatenation). Boolean → static string. String → borrowed.
pub fn toString(arena: Allocator, val: Result) error{ OutOfMemory, WriteFailed }![]const u8 {
return switch (val) {
.string => |s| s,
.boolean => |b| if (b) "true" else "false",
.number => |n| try numberToString(arena, n),
.node_set => |ns| if (ns.len == 0) "" else try stringValueOf(arena, ns[0]),
};
}
/// XPath spec §4.2: NaN, ±0, and ±Infinity have specific spellings;
/// integer-valued numbers print without trailing `.0`. Diverges from
/// Zig's default `{d}` which prints `nan`/`inf` and may emit `-0`.
pub fn numberToString(arena: Allocator, n: f64) error{OutOfMemory}![]const u8 {
if (std.math.isNan(n)) return "NaN";
if (std.math.isPositiveInf(n)) return "Infinity";
if (std.math.isNegativeInf(n)) return "-Infinity";
if (n == 0) return "0"; // covers +0 and -0
if (@trunc(n) == n and n >= -9.007199254740992e15 and n <= 9.007199254740992e15) {
return std.fmt.allocPrint(arena, "{d}", .{@as(i64, @intFromFloat(n))});
}
return std.fmt.allocPrint(arena, "{d}", .{n});
}
const testing = std.testing;
test "Result: toBoolean" {
try testing.expect(toBoolean(.{ .boolean = true }));
try testing.expect(!toBoolean(.{ .boolean = false }));
try testing.expect(toBoolean(.{ .number = 1 }));
try testing.expect(!toBoolean(.{ .number = 0 }));
try testing.expect(!toBoolean(.{ .number = std.math.nan(f64) }));
try testing.expect(toBoolean(.{ .string = "x" }));
try testing.expect(!toBoolean(.{ .string = "" }));
try testing.expect(!toBoolean(.{ .node_set = &.{} }));
}
test "Result: stringToNumber" {
try testing.expectEqual(@as(f64, 42), stringToNumber("42"));
try testing.expectEqual(@as(f64, 3.14), stringToNumber("3.14"));
try testing.expectEqual(@as(f64, -1), stringToNumber("-1"));
try testing.expectEqual(@as(f64, 5), stringToNumber(" 5 "));
try testing.expect(std.math.isNan(stringToNumber("")));
try testing.expect(std.math.isNan(stringToNumber(" ")));
try testing.expect(std.math.isNan(stringToNumber("abc")));
}
test "Result: numberToString — integers print without decimal" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
try testing.expectEqualStrings("5", try numberToString(a, 5));
try testing.expectEqualStrings("0", try numberToString(a, 0));
try testing.expectEqualStrings("0", try numberToString(a, -0.0));
try testing.expectEqualStrings("-1", try numberToString(a, -1));
try testing.expectEqualStrings("42", try numberToString(a, 42.0));
}
test "Result: numberToString — special values" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
try testing.expectEqualStrings("NaN", try numberToString(a, std.math.nan(f64)));
try testing.expectEqualStrings("Infinity", try numberToString(a, std.math.inf(f64)));
try testing.expectEqualStrings("-Infinity", try numberToString(a, -std.math.inf(f64)));
}
test "Result: numberToString — floats" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
const a = arena.allocator();
try testing.expectEqualStrings("3.14", try numberToString(a, 3.14));
try testing.expectEqualStrings("0.5", try numberToString(a, 0.5));
}
test "Result: toString — boolean returns static string" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
try testing.expectEqualStrings("true", try toString(arena.allocator(), .{ .boolean = true }));
try testing.expectEqualStrings("false", try toString(arena.allocator(), .{ .boolean = false }));
}
test "Result: toString — node-set with empty arr is empty" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
try testing.expectEqualStrings("", try toString(arena.allocator(), .{ .node_set = &.{} }));
}
test "Result: toNumber — empty node-set is NaN" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
try testing.expect(std.math.isNan(try toNumber(arena.allocator(), .{ .node_set = &.{} })));
}
test "Result: toNumber — boolean coerces to 0/1" {
var arena = std.heap.ArenaAllocator.init(testing.allocator);
defer arena.deinit();
try testing.expectEqual(@as(f64, 1), try toNumber(arena.allocator(), .{ .boolean = true }));
try testing.expectEqual(@as(f64, 0), try toNumber(arena.allocator(), .{ .boolean = false }));
}

View File

@@ -27,6 +27,7 @@ const dump = @import("../../browser/dump.zig");
const js = @import("../../browser/js/js.zig");
const DOMNode = @import("../../browser/webapi/Node.zig");
const Selector = @import("../../browser/webapi/selector/Selector.zig");
const xpath = @import("../../browser/xpath/Evaluator.zig");
const log = lp.log;
const Allocator = std.mem.Allocator;
@@ -91,6 +92,56 @@ fn getDocument(cmd: *CDP.Command) !void {
return cmd.sendResult(.{ .root = bc.nodeWriter(node, .{ .depth = params.depth }) }, .{});
}
// Closed set of XPath 1.0 named axes. Matched literally before `::` so
// CSS pseudo-elements (`a::before`, `div::first-line`) don't get
// misrouted to the XPath evaluator just because they have an
// identifier-looking word before `::`.
const xpath_axis_names = std.StaticStringMap(void).initComptime(.{
.{ "child", {} },
.{ "descendant", {} },
.{ "descendant-or-self", {} },
.{ "self", {} },
.{ "parent", {} },
.{ "ancestor", {} },
.{ "ancestor-or-self", {} },
.{ "following-sibling", {} },
.{ "preceding-sibling", {} },
.{ "following", {} },
.{ "preceding", {} },
.{ "attribute", {} },
.{ "namespace", {} },
});
// Heuristic (decision #2/#9): treat the query as XPath when it begins
// with a path operator or contains an axis specifier; otherwise fall
// through to CSS.
fn isXPathQuery(q: []const u8) bool {
if (q.len == 0) return false;
if (q[0] == '/') return true;
if (q[0] == '.' and q.len > 1 and q[1] == '/') return true;
if (q[0] == '(' and q.len > 1) {
if (q[1] == '/') return true;
if (q[1] == '.' and q.len > 2 and q[2] == '/') return true;
}
// For `::` to be an XPath axis separator, the identifier immediately
// before it must be one of the 13 named axes. Walk back the run of
// [a-zA-Z-] characters and look it up in the closed set.
var idx: usize = 0;
while (std.mem.indexOfPos(u8, q, idx, "::")) |hit| : (idx = hit + 1) {
if (hit == 0) continue;
var start = hit;
while (start > 0) {
const c = q[start - 1];
const is_axis_char = (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '-';
if (!is_axis_char) break;
start -= 1;
}
if (start == hit) continue;
if (xpath_axis_names.has(q[start..hit])) return true;
}
return false;
}
// https://chromedevtools.github.io/devtools-protocol/tot/DOM/#method-performSearch
fn performSearch(cmd: *CDP.Command) !void {
const params = (try cmd.params(struct {
@@ -100,15 +151,23 @@ fn performSearch(cmd: *CDP.Command) !void {
const bc = cmd.browser_context orelse return error.BrowserContextNotLoaded;
const frame = bc.session.currentFrame() orelse return error.FrameNotLoaded;
const list = try Selector.querySelectorAll(frame.window._document.asNode(), params.query, frame);
const root = frame.window._document.asNode();
if (isXPathQuery(params.query)) {
const arena = try frame.getArena(.medium, "DOM.performSearch");
defer frame.releaseArena(arena);
const nodes = try xpath.searchAll(arena, root, params.query, frame);
return finishSearch(cmd, bc, nodes);
}
const list = try Selector.querySelectorAll(root, params.query, frame);
defer list.deinit(frame._page);
return finishSearch(cmd, bc, list._nodes);
}
const search = try bc.node_search_list.create(list._nodes);
// dispatch setChildNodesEvents to inform the client of the subpart of node
// tree covering the results.
try dispatchSetChildNodes(cmd, list._nodes);
fn finishSearch(cmd: *CDP.Command, bc: *CDP.BrowserContext, nodes: []const *DOMNode) !void {
const search = try bc.node_search_list.create(nodes);
try dispatchSetChildNodes(cmd, nodes);
return cmd.sendResult(.{
.searchId = search.name,
.resultCount = @as(u32, @intCast(search.node_ids.len)),
@@ -616,6 +675,78 @@ test "cdp.dom: search flow" {
try ctx.expectSentError(-31998, "SearchResultNotFound", .{ .id = 17 });
}
test "cdp.dom: performSearch with XPath" {
var ctx = try testing.context();
defer ctx.deinit();
_ = try ctx.loadBrowserContext(.{ .id = "BID-A", .url = "cdp/perform_search_xpath.html" });
try ctx.processMessage(.{
.id = 20,
.method = "DOM.performSearch",
.params = .{ .query = "//p" },
});
try ctx.expectSentResult(.{ .searchId = "0", .resultCount = 3 }, .{ .id = 20 });
try ctx.processMessage(.{
.id = 21,
.method = "DOM.performSearch",
.params = .{ .query = "descendant::p" },
});
try ctx.expectSentResult(.{ .searchId = "1", .resultCount = 3 }, .{ .id = 21 });
try ctx.processMessage(.{
.id = 22,
.method = "DOM.performSearch",
.params = .{ .query = "//*[@id='outer']" },
});
try ctx.expectSentResult(.{ .searchId = "2", .resultCount = 1 }, .{ .id = 22 });
try ctx.processMessage(.{
.id = 23,
.method = "DOM.performSearch",
.params = .{ .query = "p" },
});
try ctx.expectSentResult(.{ .searchId = "3", .resultCount = 3 }, .{ .id = 23 });
try ctx.processMessage(.{
.id = 24,
.method = "DOM.performSearch",
.params = .{ .query = "div p" },
});
try ctx.expectSentResult(.{ .searchId = "4", .resultCount = 2 }, .{ .id = 24 });
}
test "cdp.dom: isXPathQuery heuristic" {
// XPath-shaped queries — each line covers a distinct heuristic branch.
try std.testing.expect(isXPathQuery("/html"));
try std.testing.expect(isXPathQuery("//p"));
try std.testing.expect(isXPathQuery(".//foo"));
try std.testing.expect(isXPathQuery("(//foo)[1]"));
try std.testing.expect(isXPathQuery("(./bar)[2]"));
try std.testing.expect(isXPathQuery("descendant::p"));
try std.testing.expect(isXPathQuery("ancestor-or-self::*"));
try std.testing.expect(isXPathQuery("//*[@id='x']"));
// CSS-shaped queries — fall through to the existing path.
try std.testing.expect(!isXPathQuery(""));
try std.testing.expect(!isXPathQuery("p"));
try std.testing.expect(!isXPathQuery("div p"));
try std.testing.expect(!isXPathQuery("#main"));
try std.testing.expect(!isXPathQuery(".cls"));
try std.testing.expect(!isXPathQuery("[data-x]"));
try std.testing.expect(!isXPathQuery("(p)")); // parens without path → CSS
try std.testing.expect(!isXPathQuery(".x")); // leading dot without /
// CSS pseudo-elements: identifier before `::` is not an XPath axis name.
try std.testing.expect(!isXPathQuery("a::before"));
try std.testing.expect(!isXPathQuery("div::after"));
try std.testing.expect(!isXPathQuery("p::first-line"));
try std.testing.expect(!isXPathQuery("input::placeholder"));
// Attribute selector with `::` inside a literal — nothing axis-like before it.
try std.testing.expect(!isXPathQuery("[data-x=\"x::y\"]"));
}
test "cdp.dom: querySelector unknown search id" {
var ctx = try testing.context();
defer ctx.deinit();