mirror of
https://github.com/lightpanda-io/browser.git
synced 2026-06-11 09:35:59 -04:00
xpath: extend fast path to non-positional descendant queries
Generalizes 8733e33b's //tag[@id='x'] shape: tryFusedDescendantFastPath handles any //tag[safe] or .//tag[safe] where the predicates are non-positional boolean/node-set checks. Walks the search root's descendants once in document order, applies node test + predicates inline, no per-step materialization, no dedup. 5-9x on //div, //*, //*[@class='x'], //div[contains(...)]; ~25x on (//div)[1] and count(//div) where the inner path is the shape. Safety gate rejects predicates that could produce a number at the top level (number, neg, arithmetic binop, numeric-returning fn-call) and any predicate containing position()/last() anywhere. Conservative: a nested sub-path's local positional predicate is rejected even though it's scoped to its own axis.
This commit is contained in:
@@ -109,6 +109,7 @@ fn evalExpr(self: *Evaluator, expr: *const Ast.Expr, ctx: *Node, pos: usize, siz
|
||||
|
||||
fn evalPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!Result.Result {
|
||||
if (try self.tryIdLookupFastPath(path, ctx)) |result| return result;
|
||||
if (try self.tryFusedDescendantFastPath(path, ctx)) |result| return result;
|
||||
|
||||
const start: *Node = if (path.absolute) blk: {
|
||||
if (ctx._type == .document) break :blk ctx;
|
||||
@@ -146,18 +147,7 @@ fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Resu
|
||||
// Two acceptable AST shapes:
|
||||
// //tag[@id='x'] parses to: ds::node() / child::tag[pred]
|
||||
// .//tag[@id='x'] parses to: self::node() / ds::node() / child::tag[pred]
|
||||
const target: Ast.Step = switch (path.steps.len) {
|
||||
2 => blk: {
|
||||
if (!isDescendantOrSelfNode(path.steps[0])) return null;
|
||||
break :blk path.steps[1];
|
||||
},
|
||||
3 => blk: {
|
||||
if (!isSelfNode(path.steps[0])) return null;
|
||||
if (!isDescendantOrSelfNode(path.steps[1])) return null;
|
||||
break :blk path.steps[2];
|
||||
},
|
||||
else => return null,
|
||||
};
|
||||
const target = matchDescendantPathShape(path) orelse return null;
|
||||
|
||||
if (target.axis != .child) return null;
|
||||
if (target.predicates.len != 1) return null;
|
||||
@@ -205,6 +195,150 @@ fn tryIdLookupFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Resu
|
||||
return Result.Result{ .node_set = out };
|
||||
}
|
||||
|
||||
// Generalization of `tryIdLookupFastPath` to non-ID predicates. Same
|
||||
// AST shape (`//<test>[preds]` / `.//<test>[preds]`), but instead of
|
||||
// dispatching to `getElementByIdFromNode`, walks the descendants of
|
||||
// the search root once in document order, applying the node test and
|
||||
// any "safe" non-positional predicates inline. Skips the general path's
|
||||
// per-step axis materialization, the per-step `filtered`/`current`
|
||||
// ArrayLists, and the dedup hash map (single-context forward walk
|
||||
// already preserves doc order).
|
||||
//
|
||||
// Hits the bulk of the benchmark's remaining cost: `//div`, `//*`,
|
||||
// `//*[@class='x']`, `//div[@class='x']`, `//div[contains(@class,'x')]`.
|
||||
//
|
||||
// "Safe" predicates: not numeric at the top level (number, neg,
|
||||
// arithmetic binop, or a fn-call returning a number), and free of
|
||||
// `position()`/`last()` anywhere in the predicate AST. Numeric predicates
|
||||
// would need `position()` context which the fused walk doesn't track,
|
||||
// and a `position()`/`last()` reference inside a sub-path's own step is
|
||||
// rejected conservatively even though it's local to that sub-axis.
|
||||
fn tryFusedDescendantFastPath(self: *Evaluator, path: Ast.Path, ctx: *Node) Error!?Result.Result {
|
||||
const target = matchDescendantPathShape(path) orelse return null;
|
||||
if (target.axis != .child) return null;
|
||||
|
||||
for (target.predicates) |p| {
|
||||
if (!isSafeNonPositionalPredicate(p)) return null;
|
||||
}
|
||||
|
||||
const lowered_name: ?[]const u8 = switch (target.node_test) {
|
||||
.name => |n| if (std.mem.eql(u8, n, "*")) null else try std.ascii.allocLowerString(self.arena, n),
|
||||
.type_test => null,
|
||||
};
|
||||
|
||||
const search_root: *Node = if (path.absolute) blk: {
|
||||
if (ctx._type == .document) break :blk ctx;
|
||||
const owner = ctx.ownerDocument(self.frame) orelse return null;
|
||||
break :blk owner.asNode();
|
||||
} else ctx;
|
||||
|
||||
var out: std.ArrayList(*Node) = .empty;
|
||||
try self.fusedDescend(search_root, target, lowered_name, &out);
|
||||
return Result.Result{ .node_set = out.items };
|
||||
}
|
||||
|
||||
fn fusedDescend(
|
||||
self: *Evaluator,
|
||||
parent: *Node,
|
||||
target: Ast.Step,
|
||||
lowered_name: ?[]const u8,
|
||||
out: *std.ArrayList(*Node),
|
||||
) Error!void {
|
||||
var it = parent.childrenIterator();
|
||||
while (it.next()) |c| {
|
||||
if (matchTest(c, target.node_test, target.axis, lowered_name)) {
|
||||
var ok = true;
|
||||
for (target.predicates) |pred| {
|
||||
// Position / size are synthetic. Safe because the
|
||||
// predicate-safety gate already rejected any expression
|
||||
// that depends on either.
|
||||
const val = try self.evalExpr(pred, c, 1, 1);
|
||||
if (!Result.toBoolean(val)) {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ok) try out.append(self.arena, c);
|
||||
}
|
||||
try self.fusedDescend(c, target, lowered_name, out);
|
||||
}
|
||||
}
|
||||
|
||||
fn matchDescendantPathShape(path: Ast.Path) ?Ast.Step {
|
||||
return switch (path.steps.len) {
|
||||
2 => blk: {
|
||||
if (!isDescendantOrSelfNode(path.steps[0])) break :blk null;
|
||||
break :blk path.steps[1];
|
||||
},
|
||||
3 => blk: {
|
||||
if (!isSelfNode(path.steps[0])) break :blk null;
|
||||
if (!isDescendantOrSelfNode(path.steps[1])) break :blk null;
|
||||
break :blk path.steps[2];
|
||||
},
|
||||
else => null,
|
||||
};
|
||||
}
|
||||
|
||||
fn isSafeNonPositionalPredicate(expr: *const Ast.Expr) bool {
|
||||
if (isNumericTopLevel(expr)) return false;
|
||||
if (containsPositionOrLast(expr)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
fn isNumericTopLevel(expr: *const Ast.Expr) bool {
|
||||
return switch (expr.*) {
|
||||
.number, .neg => true,
|
||||
.binop => |bo| switch (bo.op) {
|
||||
.add, .sub, .mul, .div, .mod => true,
|
||||
else => false,
|
||||
},
|
||||
.fn_call => |fc| isNumericFnName(fc.name),
|
||||
else => false,
|
||||
};
|
||||
}
|
||||
|
||||
fn isNumericFnName(name: []const u8) bool {
|
||||
const numeric = [_][]const u8{
|
||||
"position", "last", "count", "sum",
|
||||
"floor", "ceiling", "round", "number",
|
||||
"string-length",
|
||||
};
|
||||
for (numeric) |n| {
|
||||
if (std.mem.eql(u8, name, n)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn containsPositionOrLast(expr: *const Ast.Expr) bool {
|
||||
return switch (expr.*) {
|
||||
.number, .literal, .var_ref => false,
|
||||
.neg => |inner| containsPositionOrLast(inner),
|
||||
.binop => |bo| containsPositionOrLast(bo.left) or containsPositionOrLast(bo.right),
|
||||
.filter => |f| containsPositionOrLast(f.expr) or containsPositionOrLast(f.predicate),
|
||||
.filter_path => |fp| containsPositionOrLast(fp.filter) or stepsContainPositionOrLast(fp.steps),
|
||||
.path => |p| stepsContainPositionOrLast(p.steps),
|
||||
.fn_call => |fc| std.mem.eql(u8, fc.name, "position") or
|
||||
std.mem.eql(u8, fc.name, "last") or
|
||||
argsContainPositionOrLast(fc.args),
|
||||
};
|
||||
}
|
||||
|
||||
fn stepsContainPositionOrLast(steps: []const Ast.Step) bool {
|
||||
for (steps) |s| {
|
||||
for (s.predicates) |p| {
|
||||
if (containsPositionOrLast(p)) return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn argsContainPositionOrLast(args: []const *Ast.Expr) bool {
|
||||
for (args) |a| {
|
||||
if (containsPositionOrLast(a)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
fn isDescendantOrSelfNode(s: Ast.Step) bool {
|
||||
if (s.axis != .descendant_or_self) return false;
|
||||
if (s.predicates.len != 0) return false;
|
||||
|
||||
Reference in New Issue
Block a user