From ce722c1f6e4649130b4d73dec1994fcfe0cf7452 Mon Sep 17 00:00:00 2001 From: Navid EMAD Date: Wed, 6 May 2026 19:41:53 +0200 Subject: [PATCH] xpath: extend fast path to non-positional descendant queries Generalizes 8733e33b's //tag[@id='x'] shape: tryFusedDescendantFastPath handles any //tag[safe] or .//tag[safe] where the predicates are non-positional boolean/node-set checks. Walks the search root's descendants once in document order, applies node test + predicates inline, no per-step materialization, no dedup. 5-9x on //div, //*, //*[@class='x'], //div[contains(...)]; ~25x on (//div)[1] and count(//div) where the inner path is the shape. Safety gate rejects predicates that could produce a number at the top level (number, neg, arithmetic binop, numeric-returning fn-call) and any predicate containing position()/last() anywhere. Conservative: a nested sub-path's local positional predicate is rejected even though it's scoped to its own axis. --- src/browser/xpath/Evaluator.zig | 158 +++++++++++++++++++++++++++++--- 1 file changed, 146 insertions(+), 12 deletions(-) diff --git a/src/browser/xpath/Evaluator.zig b/src/browser/xpath/Evaluator.zig index 1e4b1b13..4dfcc85c 100644 --- a/src/browser/xpath/Evaluator.zig +++ b/src/browser/xpath/Evaluator.zig @@ -109,6 +109,7 @@ fn evalExpr(self: *Evaluator, expr: *const Ast.Expr, ctx: *Node, pos: usize, siz fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result { if (try self.tryIdLookupFastPath(path, ctx)) |result| return result; + if (try self.tryFusedDescendantFastPath(path, ctx)) |result| return result; const start: *Node = if (path.absolute) blk: { if (ctx._type == .document) break :blk ctx; @@ -146,18 +147,7 @@ fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Resu // Two acceptable AST shapes: // //tag[@id='x'] parses to: ds::node() / child::tag[pred] // .//tag[@id='x'] parses to: self::node() / ds::node() / child::tag[pred] - const target: Ast.Step = switch (path.steps.len) { - 2 => blk: { - if (!isDescendantOrSelfNode(path.steps[0])) return null; - break :blk path.steps[1]; - }, - 3 => blk: { - if (!isSelfNode(path.steps[0])) return null; - if (!isDescendantOrSelfNode(path.steps[1])) return null; - break :blk path.steps[2]; - }, - else => return null, - }; + const target = matchDescendantPathShape(path) orelse return null; if (target.axis != .child) return null; if (target.predicates.len != 1) return null; @@ -205,6 +195,150 @@ fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Resu return Result.Result{ .node_set = out }; } +// Generalization of `tryIdLookupFastPath` to non-ID predicates. Same +// AST shape (`//[preds]` / `.//[preds]`), but instead of +// dispatching to `getElementByIdFromNode`, walks the descendants of +// the search root once in document order, applying the node test and +// any "safe" non-positional predicates inline. Skips the general path's +// per-step axis materialization, the per-step `filtered`/`current` +// ArrayLists, and the dedup hash map (single-context forward walk +// already preserves doc order). +// +// Hits the bulk of the benchmark's remaining cost: `//div`, `//*`, +// `//*[@class='x']`, `//div[@class='x']`, `//div[contains(@class,'x')]`. +// +// "Safe" predicates: not numeric at the top level (number, neg, +// arithmetic binop, or a fn-call returning a number), and free of +// `position()`/`last()` anywhere in the predicate AST. Numeric predicates +// would need `position()` context which the fused walk doesn't track, +// and a `position()`/`last()` reference inside a sub-path's own step is +// rejected conservatively even though it's local to that sub-axis. +fn tryFusedDescendantFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Result.Result { + const target = matchDescendantPathShape(path) orelse return null; + if (target.axis != .child) return null; + + for (target.predicates) |p| { + if (!isSafeNonPositionalPredicate(p)) return null; + } + + const lowered_name: ?[]const u8 = switch (target.node_test) { + .name => |n| if (std.mem.eql(u8, n, "*")) null else try std.ascii.allocLowerString(self.arena, n), + .type_test => null, + }; + + const search_root: *Node = if (path.absolute) blk: { + if (ctx._type == .document) break :blk ctx; + const owner = ctx.ownerDocument(self.frame) orelse return null; + break :blk owner.asNode(); + } else ctx; + + var out: std.ArrayList(*Node) = .empty; + try self.fusedDescend(search_root, target, lowered_name, &out); + return Result.Result{ .node_set = out.items }; +} + +fn fusedDescend( + self: *Evaluator, + parent: *Node, + target: Ast.Step, + lowered_name: ?[]const u8, + out: *std.ArrayList(*Node), +) Error!void { + var it = parent.childrenIterator(); + while (it.next()) |c| { + if (matchTest(c, target.node_test, target.axis, lowered_name)) { + var ok = true; + for (target.predicates) |pred| { + // Position / size are synthetic. Safe because the + // predicate-safety gate already rejected any expression + // that depends on either. + const val = try self.evalExpr(pred, c, 1, 1); + if (!Result.toBoolean(val)) { + ok = false; + break; + } + } + if (ok) try out.append(self.arena, c); + } + try self.fusedDescend(c, target, lowered_name, out); + } +} + +fn matchDescendantPathShape(path: Ast.Path) ?Ast.Step { + return switch (path.steps.len) { + 2 => blk: { + if (!isDescendantOrSelfNode(path.steps[0])) break :blk null; + break :blk path.steps[1]; + }, + 3 => blk: { + if (!isSelfNode(path.steps[0])) break :blk null; + if (!isDescendantOrSelfNode(path.steps[1])) break :blk null; + break :blk path.steps[2]; + }, + else => null, + }; +} + +fn isSafeNonPositionalPredicate(expr: *const Ast.Expr) bool { + if (isNumericTopLevel(expr)) return false; + if (containsPositionOrLast(expr)) return false; + return true; +} + +fn isNumericTopLevel(expr: *const Ast.Expr) bool { + return switch (expr.*) { + .number, .neg => true, + .binop => |bo| switch (bo.op) { + .add, .sub, .mul, .div, .mod => true, + else => false, + }, + .fn_call => |fc| isNumericFnName(fc.name), + else => false, + }; +} + +fn isNumericFnName(name: []const u8) bool { + const numeric = [_][]const u8{ + "position", "last", "count", "sum", + "floor", "ceiling", "round", "number", + "string-length", + }; + for (numeric) |n| { + if (std.mem.eql(u8, name, n)) return true; + } + return false; +} + +fn containsPositionOrLast(expr: *const Ast.Expr) bool { + return switch (expr.*) { + .number, .literal, .var_ref => false, + .neg => |inner| containsPositionOrLast(inner), + .binop => |bo| containsPositionOrLast(bo.left) or containsPositionOrLast(bo.right), + .filter => |f| containsPositionOrLast(f.expr) or containsPositionOrLast(f.predicate), + .filter_path => |fp| containsPositionOrLast(fp.filter) or stepsContainPositionOrLast(fp.steps), + .path => |p| stepsContainPositionOrLast(p.steps), + .fn_call => |fc| std.mem.eql(u8, fc.name, "position") or + std.mem.eql(u8, fc.name, "last") or + argsContainPositionOrLast(fc.args), + }; +} + +fn stepsContainPositionOrLast(steps: []const Ast.Step) bool { + for (steps) |s| { + for (s.predicates) |p| { + if (containsPositionOrLast(p)) return true; + } + } + return false; +} + +fn argsContainPositionOrLast(args: []const *Ast.Expr) bool { + for (args) |a| { + if (containsPositionOrLast(a)) return true; + } + return false; +} + fn isDescendantOrSelfNode(s: Ast.Step) bool { if (s.axis != .descendant_or_self) return false; if (s.predicates.len != 0) return false;