diff --git a/src/browser/tests/xpath/xpath_perf.html b/src/browser/tests/xpath/xpath_perf.html
new file mode 100644
index 00000000..0d31e052
--- /dev/null
+++ b/src/browser/tests/xpath/xpath_perf.html
@@ -0,0 +1,171 @@
+
+
+
+ XPath perf benchmark
+
+
+
+
+
+
+
+
+
+
diff --git a/src/browser/webapi/XPathResult.zig b/src/browser/webapi/XPathResult.zig
index 44c29b44..2845480e 100644
--- a/src/browser/webapi/XPathResult.zig
+++ b/src/browser/webapi/XPathResult.zig
@@ -281,3 +281,7 @@ test "WebApi: XPathResult" {
test "WebApi: XPath conformance" {
try testing.htmlRunner("xpath/xpath_conformance.html", .{});
}
+
+test "WebApi: XPath perf" {
+ try testing.htmlRunner("xpath/xpath_perf.html", .{});
+}
diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig
index d654ed8f..1e4b1b13 100644
--- a/src/browser/xpath/Evaluator.zig
+++ b/src/browser/xpath/Evaluator.zig
@@ -108,6 +108,8 @@ fn evalExpr(self: *Evaluator, expr: *const Ast.Expr, ctx: *Node, pos: usize, siz
}
fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result {
+ if (try self.tryIdLookupFastPath(path, ctx)) |result| return result;
+
const start: *Node = if (path.absolute) blk: {
if (ctx._type == .document) break :blk ctx;
const owner = ctx.ownerDocument(self.frame) orelse break :blk ctx;
@@ -125,6 +127,125 @@ fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result {
return .{ .node_set = current_set };
}
+// Recognize the very common `//tag[@id='x']` and `.//tag[@id='x']`
+// shapes (and their wildcard `//*[@id='x']` variants) and serve them
+// directly from `frame.getElementByIdFromNode`. Accepts the literal on
+// either side of `=`.
+//
+// Mirrors the same tradeoff `webapi/selector/List.zig:optimizeSelector`
+// already makes for `querySelector(All)`: the id-map only stores the
+// first element per ID in document order, so duplicate IDs (invalid
+// HTML, but possible) yield one match here where a strict tree walk
+// would find all. Acceptable because Capybara/Selenium hot paths
+// assume unique IDs and CSS has shipped this compromise for years.
+//
+// Falls through to the general path for any deviation: extra steps,
+// extra predicates, non-eq predicate, non-literal RHS, or the
+// inability to resolve a search root.
+fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Result.Result {
+ // Two acceptable AST shapes:
+ // //tag[@id='x'] parses to: ds::node() / child::tag[pred]
+ // .//tag[@id='x'] parses to: self::node() / ds::node() / child::tag[pred]
+ const target: Ast.Step = switch (path.steps.len) {
+ 2 => blk: {
+ if (!isDescendantOrSelfNode(path.steps[0])) return null;
+ break :blk path.steps[1];
+ },
+ 3 => blk: {
+ if (!isSelfNode(path.steps[0])) return null;
+ if (!isDescendantOrSelfNode(path.steps[1])) return null;
+ break :blk path.steps[2];
+ },
+ else => return null,
+ };
+
+ if (target.axis != .child) return null;
+ if (target.predicates.len != 1) return null;
+
+ // Tag name (null = wildcard "*"). type_test (e.g. `node()`,
+ // `text()`) doesn't qualify because getElementByIdFromNode only
+ // returns elements.
+ const tag_name: ?[]const u8 = switch (target.node_test) {
+ .name => |n| if (std.mem.eql(u8, n, "*")) null else n,
+ .type_test => return null,
+ };
+
+ const id_value = matchAttrEqLiteral(target.predicates[0], "id") orelse return null;
+
+ // Resolve search root the same way the general path does.
+ const search_root: *Node = if (path.absolute) blk: {
+ if (ctx._type == .document) break :blk ctx;
+ const owner = ctx.ownerDocument(self.frame) orelse return null;
+ break :blk owner.asNode();
+ } else ctx;
+
+ const id_element = self.frame.getElementByIdFromNode(search_root, id_value) orelse {
+ return Result.Result{ .node_set = &.{} };
+ };
+ const id_node = id_element.asNode();
+
+ // Relative paths must filter to descendants of the context.
+ // getElementByIdFromNode is doc-wide.
+ if (search_root != id_node and !search_root.contains(id_node)) {
+ return Result.Result{ .node_set = &.{} };
+ }
+
+ // Tag check (case-insensitive per decision #2). Element tag names
+ // are stored lowercase via `getTagNameLower`; lowercase the AST
+ // name once and compare.
+ if (tag_name) |tag| {
+ const lowered = try std.ascii.allocLowerString(self.arena, tag);
+ if (!std.mem.eql(u8, lowered, id_element.getTagNameLower())) {
+ return Result.Result{ .node_set = &.{} };
+ }
+ }
+
+ const out = try self.arena.alloc(*Node, 1);
+ out[0] = id_node;
+ return Result.Result{ .node_set = out };
+}
+
+fn isDescendantOrSelfNode(s: Ast.Step) bool {
+ if (s.axis != .descendant_or_self) return false;
+ if (s.predicates.len != 0) return false;
+ return switch (s.node_test) {
+ .type_test => |k| k == .node,
+ .name => false,
+ };
+}
+
+fn isSelfNode(s: Ast.Step) bool {
+ if (s.axis != .self) return false;
+ if (s.predicates.len != 0) return false;
+ return switch (s.node_test) {
+ .type_test => |k| k == .node,
+ .name => false,
+ };
+}
+
+fn matchAttrEqLiteral(expr: *const Ast.Expr, attr_name: []const u8) ?[]const u8 {
+ if (expr.* != .binop) return null;
+ const bo = expr.binop;
+ if (bo.op != .eq) return null;
+ if (isAttrPath(bo.left, attr_name) and bo.right.* == .literal) return bo.right.literal;
+ if (isAttrPath(bo.right, attr_name) and bo.left.* == .literal) return bo.left.literal;
+ return null;
+}
+
+fn isAttrPath(expr: *const Ast.Expr, attr_name: []const u8) bool {
+ if (expr.* != .path) return false;
+ const p = expr.path;
+ if (p.absolute) return false;
+ if (p.steps.len != 1) return false;
+ const s = p.steps[0];
+ if (s.axis != .attribute) return false;
+ if (s.predicates.len != 0) return false;
+ return switch (s.node_test) {
+ .name => |n| std.mem.eql(u8, n, attr_name),
+ .type_test => false,
+ };
+}
+
fn evalFilterPath(self: *Evaluator, fp: Ast.FilterPath, ctx: *Node, pos: usize, size: usize) Error!Result.Result {
const base = try self.evalExpr(fp.filter, ctx, pos, size);
if (base != .node_set) return base;