mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-06-11 09:35:59 -04:00
xpath: id-lookup fast path + perf benchmark
evalPath recognizes //tag[@id='x'] and .//tag[@id='x'] (plus the //*[@id='x'] wildcard) and serves them via frame.getElementByIdFromNode. ~100-150x speedup on ID lookups (3231us -> 22.6us for //*[@id='target'] in the new benchmark). Falls through to general path on any deviation (extra step, extra predicate, non-eq, non-literal RHS). Inherits the same duplicate-ID compromise selector/List.zig ships for querySelector(All): the id-map stores only the first element per ID in document order. Capybara/Selenium hot paths assume unique IDs. tests/xpath/xpath_perf.html is the 13-query micro-benchmark used to collect the numbers; batched console.warn output survives test runner interleaving.
This commit is contained in:
171
src/browser/tests/xpath/xpath_perf.html
Normal file
171
src/browser/tests/xpath/xpath_perf.html
Normal file
@@ -0,0 +1,171 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>XPath perf benchmark</title>
|
||||
<script src="../testing.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<!--
|
||||
Micro-benchmark for the XPath evaluator.
|
||||
|
||||
Builds a deterministic DOM (TREE_SIZE = 500), then runs each query
|
||||
a few warmup iterations followed by ITERATIONS timed iterations.
|
||||
Reports mean µs/iter via console.warn (the test harness sets
|
||||
log level to .warn and silently drops info-level lines, so
|
||||
console.log doesn't surface; console.warn does). A snapshotLength
|
||||
mismatch fails the test loudly via testing.fail so a regression in
|
||||
result count can't be hidden by the timing line.
|
||||
|
||||
Run: make test F=xpath_perf
|
||||
Filter: make test F=xpath_perf 2>&1 | grep '\[xpath-perf\]'
|
||||
|
||||
Query shapes target the optimization roadmap:
|
||||
//*[@id='x'] — global ID lookup (fast-path candidate)
|
||||
//tag[@id='x'] — typed ID lookup (fast-path candidate)
|
||||
//tag — pure descendant tag scan
|
||||
//*[@class='x'] — non-ID attribute filter (no fast path)
|
||||
(//tag)[1] / [last()] — early-exit candidates (iterator opt)
|
||||
count(//tag) — early-exit candidate (iterator opt)
|
||||
Plus a few that should NOT change so we can detect regressions.
|
||||
|
||||
Tuning: keep TREE_SIZE × ITERATIONS small enough that the test
|
||||
finishes in <1s on debug builds. Adjust ITERATIONS up if numbers
|
||||
are noisy.
|
||||
-->
|
||||
|
||||
<script id=xpath_perf_setup>
|
||||
{
|
||||
const TREE_SIZE = 500;
|
||||
const TAGS = ["div", "span", "p"];
|
||||
const CLASSES = ["alpha", "beta", "gamma"];
|
||||
const TARGET_INDEX = 250;
|
||||
|
||||
// Expose the construction parameters for the run script.
|
||||
window.__perf = { TREE_SIZE, TAGS, CLASSES, TARGET_INDEX };
|
||||
|
||||
// Decorrelate tag (period 3) and class (period 4) so that
|
||||
// //div[@class='alpha'] is not a degenerate restatement of //div.
|
||||
function tagOf(i) { return TAGS[i % TAGS.length]; }
|
||||
function classOf(i) { return CLASSES[(i % 4) % CLASSES.length]; }
|
||||
|
||||
// Pre-compute expected counts so the assertions don't have to
|
||||
// re-derive the formula. Stored on window.__perf for the run script.
|
||||
const tags = Object.fromEntries(TAGS.map(t => [t, 0]));
|
||||
const classes = Object.fromEntries(CLASSES.map(c => [c, 0]));
|
||||
const cross = {};
|
||||
for (let i = 0; i < TREE_SIZE; i++) {
|
||||
const t = tagOf(i), c = classOf(i);
|
||||
tags[t]++;
|
||||
classes[c]++;
|
||||
const k = `${t}-${c}`;
|
||||
cross[k] = (cross[k] || 0) + 1;
|
||||
}
|
||||
window.__perf.tags = tags;
|
||||
window.__perf.classes = classes;
|
||||
window.__perf.cross = cross;
|
||||
window.__perf.targetTag = tagOf(TARGET_INDEX);
|
||||
|
||||
// Build the fixture body via innerHTML in one shot. Faster than
|
||||
// createElement loops because html5ever parses the whole string at
|
||||
// once and we don't pay per-element bridge crossings.
|
||||
const parts = [];
|
||||
for (let i = 0; i < TREE_SIZE; i++) {
|
||||
const id = (i === TARGET_INDEX) ? "target" : `n${i}`;
|
||||
parts.push(`<${tagOf(i)} id="${id}" class="${classOf(i)}">item ${i}</${tagOf(i)}>`);
|
||||
}
|
||||
// Use <main> as the wrapper so //div, //span, //p count only the
|
||||
// generated children (the wrapper itself doesn't share a tag with
|
||||
// any test query).
|
||||
const root = document.createElement("main");
|
||||
root.id = "perf_root";
|
||||
root.innerHTML = parts.join("");
|
||||
document.body.appendChild(root);
|
||||
|
||||
testing.expectEqual(TREE_SIZE, root.children.length);
|
||||
}
|
||||
</script>
|
||||
|
||||
<script id=xpath_perf_run>
|
||||
{
|
||||
const ITERATIONS = 50;
|
||||
const WARMUP = 3;
|
||||
const { TREE_SIZE, tags, classes, cross, targetTag } = window.__perf;
|
||||
|
||||
const cases = [
|
||||
// --- ID lookups (fast-path #1 candidates) ---
|
||||
{ xp: "//*[@id='target']", expect: 1, label: "id-any" },
|
||||
{ xp: `//${targetTag}[@id='target']`, expect: 1, label: "id-typed-hit" },
|
||||
{ xp: "//div[@id='target']", expect: targetTag === "div" ? 1 : 0, label: "id-typed-miss" },
|
||||
|
||||
// --- Pure tag descendant ---
|
||||
{ xp: "//div", expect: tags.div, label: "tag-descendant" },
|
||||
{ xp: "//span", expect: tags.span, label: "tag-descendant-span" },
|
||||
{ xp: "//*", expect: null, label: "universal-descendant" },
|
||||
|
||||
// --- Attribute equality (no fast path planned) ---
|
||||
{ xp: "//*[@class='alpha']", expect: classes.alpha, label: "class-eq-any" },
|
||||
{ xp: "//div[@class='alpha']", expect: cross["div-alpha"] || 0, label: "class-eq-div" },
|
||||
|
||||
// --- Early-exit candidates (iterator optimization) ---
|
||||
{ xp: "(//div)[1]", expect: 1, label: "first-of-many" },
|
||||
{ xp: "(//div)[last()]", expect: 1, label: "last-of-many" },
|
||||
|
||||
// --- Functions in predicate (regression guard) ---
|
||||
{ xp: "//div[contains(@class,'alpha')]", expect: cross["div-alpha"] || 0, label: "contains-class" },
|
||||
{ xp: "//div[starts-with(@id,'n')]", expect: tags.div - (targetTag === "div" ? 1 : 0), label: "starts-with-id" },
|
||||
|
||||
// --- Counting (number result, iterator early-exit candidate) ---
|
||||
{ xp: "count(//div)", expect: tags.div, label: "count" },
|
||||
];
|
||||
|
||||
function pad(s, w) { s = String(s); return s.length >= w ? s : s + " ".repeat(w - s.length); }
|
||||
|
||||
// Result extractor that handles both node-set and number results.
|
||||
function evalAndCount(xp) {
|
||||
const r = document.evaluate(xp, document, null, XPathResult.ANY_TYPE, null);
|
||||
if (r.resultType === XPathResult.NUMBER_TYPE) return r.numberValue;
|
||||
if (r.resultType === XPathResult.UNORDERED_NODE_ITERATOR_TYPE
|
||||
|| r.resultType === XPathResult.ORDERED_NODE_ITERATOR_TYPE) {
|
||||
let n = 0; while (r.iterateNext()) n++;
|
||||
return n;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// Accumulate output into one big string. Per-call console.warn lines
|
||||
// get clobbered by the test harness's interleaved prints; batching
|
||||
// into a single warn call survives the interleaving intact.
|
||||
const out = [];
|
||||
out.push(`tree_size=${TREE_SIZE} iterations=${ITERATIONS} warmup=${WARMUP}`);
|
||||
out.push(`${pad("label", 24)} ${pad("count", 8)} ${pad("µs/iter", 10)} xpath`);
|
||||
|
||||
for (const c of cases) {
|
||||
let actual = null;
|
||||
for (let i = 0; i < WARMUP; i++) {
|
||||
actual = evalAndCount(c.xp);
|
||||
}
|
||||
|
||||
if (c.expect !== null && actual !== c.expect) {
|
||||
testing.fail(`[xpath-perf] ${c.label} returned ${actual}, expected ${c.expect}`);
|
||||
}
|
||||
|
||||
// Time the full document.evaluate call. ANY_TYPE so we don't pay
|
||||
// snapshot allocation we wouldn't pay in the realistic
|
||||
// DOM.performSearch path either.
|
||||
const t0 = performance.now();
|
||||
for (let i = 0; i < ITERATIONS; i++) {
|
||||
document.evaluate(c.xp, document, null, XPathResult.ANY_TYPE, null);
|
||||
}
|
||||
const t1 = performance.now();
|
||||
const us_per_iter = ((t1 - t0) * 1000) / ITERATIONS;
|
||||
|
||||
out.push(`${pad(c.label, 24)} ${pad(actual ?? "-", 8)} ${pad(us_per_iter.toFixed(1), 10)} ${c.xp}`);
|
||||
}
|
||||
|
||||
console.warn("[xpath-perf]\n" + out.map(l => "[xpath-perf] " + l).join("\n"));
|
||||
|
||||
testing.expectEqual(true, true);
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -281,3 +281,7 @@ test "WebApi: XPathResult" {
|
||||
test "WebApi: XPath conformance" {
|
||||
try testing.htmlRunner("xpath/xpath_conformance.html", .{});
|
||||
}
|
||||
|
||||
test "WebApi: XPath perf" {
|
||||
try testing.htmlRunner("xpath/xpath_perf.html", .{});
|
||||
}
|
||||
|
||||
@@ -108,6 +108,8 @@ fn evalExpr(self: *Evaluator, expr: *const Ast.Expr, ctx: *Node, pos: usize, siz
|
||||
}
|
||||
|
||||
fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result {
|
||||
if (try self.tryIdLookupFastPath(path, ctx)) |result| return result;
|
||||
|
||||
const start: *Node = if (path.absolute) blk: {
|
||||
if (ctx._type == .document) break :blk ctx;
|
||||
const owner = ctx.ownerDocument(self.frame) orelse break :blk ctx;
|
||||
@@ -125,6 +127,125 @@ fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result {
|
||||
return .{ .node_set = current_set };
|
||||
}
|
||||
|
||||
// Recognize the very common `//tag[@id='x']` and `.//tag[@id='x']`
|
||||
// shapes (and their wildcard `//*[@id='x']` variants) and serve them
|
||||
// directly from `frame.getElementByIdFromNode`. Accepts the literal on
|
||||
// either side of `=`.
|
||||
//
|
||||
// Mirrors the same tradeoff `webapi/selector/List.zig:optimizeSelector`
|
||||
// already makes for `querySelector(All)`: the id-map only stores the
|
||||
// first element per ID in document order, so duplicate IDs (invalid
|
||||
// HTML, but possible) yield one match here where a strict tree walk
|
||||
// would find all. Acceptable because Capybara/Selenium hot paths
|
||||
// assume unique IDs and CSS has shipped this compromise for years.
|
||||
//
|
||||
// Falls through to the general path for any deviation: extra steps,
|
||||
// extra predicates, non-eq predicate, non-literal RHS, or the
|
||||
// inability to resolve a search root.
|
||||
fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Result.Result {
|
||||
// Two acceptable AST shapes:
|
||||
// //tag[@id='x'] parses to: ds::node() / child::tag[pred]
|
||||
// .//tag[@id='x'] parses to: self::node() / ds::node() / child::tag[pred]
|
||||
const target: Ast.Step = switch (path.steps.len) {
|
||||
2 => blk: {
|
||||
if (!isDescendantOrSelfNode(path.steps[0])) return null;
|
||||
break :blk path.steps[1];
|
||||
},
|
||||
3 => blk: {
|
||||
if (!isSelfNode(path.steps[0])) return null;
|
||||
if (!isDescendantOrSelfNode(path.steps[1])) return null;
|
||||
break :blk path.steps[2];
|
||||
},
|
||||
else => return null,
|
||||
};
|
||||
|
||||
if (target.axis != .child) return null;
|
||||
if (target.predicates.len != 1) return null;
|
||||
|
||||
// Tag name (null = wildcard "*"). type_test (e.g. `node()`,
|
||||
// `text()`) doesn't qualify because getElementByIdFromNode only
|
||||
// returns elements.
|
||||
const tag_name: ?[]const u8 = switch (target.node_test) {
|
||||
.name => |n| if (std.mem.eql(u8, n, "*")) null else n,
|
||||
.type_test => return null,
|
||||
};
|
||||
|
||||
const id_value = matchAttrEqLiteral(target.predicates[0], "id") orelse return null;
|
||||
|
||||
// Resolve search root the same way the general path does.
|
||||
const search_root: *Node = if (path.absolute) blk: {
|
||||
if (ctx._type == .document) break :blk ctx;
|
||||
const owner = ctx.ownerDocument(self.frame) orelse return null;
|
||||
break :blk owner.asNode();
|
||||
} else ctx;
|
||||
|
||||
const id_element = self.frame.getElementByIdFromNode(search_root, id_value) orelse {
|
||||
return Result.Result{ .node_set = &.{} };
|
||||
};
|
||||
const id_node = id_element.asNode();
|
||||
|
||||
// Relative paths must filter to descendants of the context.
|
||||
// getElementByIdFromNode is doc-wide.
|
||||
if (search_root != id_node and !search_root.contains(id_node)) {
|
||||
return Result.Result{ .node_set = &.{} };
|
||||
}
|
||||
|
||||
// Tag check (case-insensitive per decision #2). Element tag names
|
||||
// are stored lowercase via `getTagNameLower`; lowercase the AST
|
||||
// name once and compare.
|
||||
if (tag_name) |tag| {
|
||||
const lowered = try std.ascii.allocLowerString(self.arena, tag);
|
||||
if (!std.mem.eql(u8, lowered, id_element.getTagNameLower())) {
|
||||
return Result.Result{ .node_set = &.{} };
|
||||
}
|
||||
}
|
||||
|
||||
const out = try self.arena.alloc(*Node, 1);
|
||||
out[0] = id_node;
|
||||
return Result.Result{ .node_set = out };
|
||||
}
|
||||
|
||||
fn isDescendantOrSelfNode(s: Ast.Step) bool {
|
||||
if (s.axis != .descendant_or_self) return false;
|
||||
if (s.predicates.len != 0) return false;
|
||||
return switch (s.node_test) {
|
||||
.type_test => |k| k == .node,
|
||||
.name => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn isSelfNode(s: Ast.Step) bool {
|
||||
if (s.axis != .self) return false;
|
||||
if (s.predicates.len != 0) return false;
|
||||
return switch (s.node_test) {
|
||||
.type_test => |k| k == .node,
|
||||
.name => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn matchAttrEqLiteral(expr: *const Ast.Expr, attr_name: []const u8) ?[]const u8 {
|
||||
if (expr.* != .binop) return null;
|
||||
const bo = expr.binop;
|
||||
if (bo.op != .eq) return null;
|
||||
if (isAttrPath(bo.left, attr_name) and bo.right.* == .literal) return bo.right.literal;
|
||||
if (isAttrPath(bo.right, attr_name) and bo.left.* == .literal) return bo.left.literal;
|
||||
return null;
|
||||
}
|
||||
|
||||
fn isAttrPath(expr: *const Ast.Expr, attr_name: []const u8) bool {
|
||||
if (expr.* != .path) return false;
|
||||
const p = expr.path;
|
||||
if (p.absolute) return false;
|
||||
if (p.steps.len != 1) return false;
|
||||
const s = p.steps[0];
|
||||
if (s.axis != .attribute) return false;
|
||||
if (s.predicates.len != 0) return false;
|
||||
return switch (s.node_test) {
|
||||
.name => |n| std.mem.eql(u8, n, attr_name),
|
||||
.type_test => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn evalFilterPath(self: *Evaluator, fp: Ast.FilterPath, ctx: *Node, pos: usize, size: usize) Error!Result.Result {
|
||||
const base = try self.evalExpr(fp.filter, ctx, pos, size);
|
||||
if (base != .node_set) return base;
|
||||
|
||||
Reference in New Issue
Block a user