From e7820bf650bb3dc18da64a34b31d0f01981a3191 Mon Sep 17 00:00:00 2001 From: Zoltan Kochan Date: Sat, 23 May 2026 12:53:46 +0200 Subject: [PATCH] perf(pacquet): cache per-wanted resolves in resolve_dependency_tree (#11874) * perf(pacquet): cache per-wanted resolves in resolve_dependency_tree Memoise `resolver.resolve()` and `extract_children()` on the tree-walk context. The first visit for a given `(alias, bare_specifier, optional)` wanted dep runs the resolver chain; later visits clone the cached `Arc` and skip `pick_package`'s cache-key formatting, HashMap probing, and semver matching. The first visit for a given `pkgIdWithPatchHash` walks the manifest for child specs; later visits clone the cached `Arc>` and skip the JSON traversal. Per-occurrence `NodeId`s and the cycle-break gate are unchanged, so peer-suffix variants for non-leaf packages stay intact. Mirrors pnpm's `resolveDependencies.ts:1584` `isNew` gate at the wanted-dep edge, which is the layer pacquet's recursion shape can short-circuit without restructuring the tree builder. Refs #11869. * docs(pacquet): unlink cross-crate pick_package references `pick_package` lives in `resolving-npm-resolver`, not in the deps-resolver crate, so the `[`pick_package`]` intra-doc links broke `cargo doc --document-private-items` under `-D warnings`. Drop the link form and keep the bare backticks. --- .../src/resolve_dependency_tree.rs | 138 ++++++++++++++---- 1 file changed, 111 insertions(+), 27 deletions(-) diff --git a/pacquet/crates/resolving-deps-resolver/src/resolve_dependency_tree.rs b/pacquet/crates/resolving-deps-resolver/src/resolve_dependency_tree.rs index df5dfa2224..ab86284059 100644 --- a/pacquet/crates/resolving-deps-resolver/src/resolve_dependency_tree.rs +++ b/pacquet/crates/resolving-deps-resolver/src/resolve_dependency_tree.rs @@ -166,6 +166,31 @@ pub(crate) fn importer_optional_dependency_names(manifest: &PackageManifest) -> manifest.dependencies([DependencyGroup::Optional]).map(|(name, _)| name.to_string()).collect() } +/// Cache key for [`TreeCtx::resolved_by_wanted`]. +/// +/// The npm-shaped slice pacquet exposes today calls +/// [`Resolver::resolve`] with only three [`WantedDependency`] fields +/// populated — `alias`, `bare_specifier`, and `optional` (see the +/// `WantedDependency` literals in [`extend_tree`] and the recursive +/// arm of [`fn@resolve_node`]). Anything else stays at `Default::default()`, +/// so a tuple over those three fields uniquely identifies a wanted +/// dep across revisits. +/// +/// `optional` is part of the key because the npm resolver's +/// `pick_package` toggles between the abbreviated and full packument +/// based on `wanted.optional` ([`pickPackage.ts:391`](https://github.com/pnpm/pnpm/blob/2a9bd897bf/resolving/npm-resolver/src/pickPackage.ts#L201)) — +/// caching by `(alias, bare_specifier)` alone would let an optional +/// caller satisfy itself with a non-optional caller's abbreviated +/// result, losing the `libc`/`cpu`/`os` filter inputs that mode +/// supplies. +type WantedKey = (Option, Option, Option); + +/// One entry in [`TreeCtx::children_specs_by_id`] — +/// `(child_alias, child_range, child_optional)` triples extracted from +/// a resolved package's manifest's `dependencies` plus +/// `optionalDependencies` sections. +type ChildSpec = (String, String, bool); + /// Mutable workspace for an in-flight tree walk. The orchestrator /// (`resolve_importer`) holds one of these across hoist iterations and /// extends it via [`extend_tree`] so newly-hoisted peer dependencies @@ -185,6 +210,30 @@ pub struct TreeCtx { /// matched against at least one resolved package. Mirrors upstream's /// `ctx.appliedPatches`. applied_patches: Mutex>, + /// Memoised [`Resolver::resolve`] results keyed by the parts of + /// [`WantedDependency`] the npm slice actually populates (see + /// [`WantedKey`]). + /// + /// pnpm's `resolveDependencies` carries the equivalent skip via the + /// [`isNew` gate](https://github.com/pnpm/pnpm/blob/097983fbca/installing/deps-resolver/src/resolveDependencies.ts#L1584) — + /// the second time a `pkgIdWithPatchHash` is reached, the walker + /// reuses the existing `resolvedPkgsById` envelope and never + /// re-enters the resolver chain. pacquet shortcuts at the layer + /// above that (the wanted-dep edge) because a `(name, range)` + /// pair that's already been resolved doesn't need + /// `pick_package`'s in-memory packument lookup, cache-key + /// formatting, or semver matching repeated. On the `alotta-files` + /// fixture this collapses the ~3 redundant `resolver.resolve()` + /// calls per package the tree walk used to drive into one. + resolved_by_wanted: + Mutex>>, + /// Cached `extract_children` output keyed by `pkgIdWithPatchHash`. + /// First visit walks the manifest's `dependencies` / + /// `optionalDependencies`; subsequent visits clone the `Arc` and + /// skip the JSON traversal. Paired with [`Self::resolved_by_wanted`] + /// so a revisit's per-call CPU is two HashMap lookups and an + /// `Arc::clone`. + children_specs_by_id: Mutex>>>, } impl TreeCtx { @@ -199,6 +248,8 @@ impl TreeCtx { policy_violations: Mutex::new(Vec::new()), patched_dependencies: None, applied_patches: Mutex::new(HashSet::new()), + resolved_by_wanted: Mutex::new(HashMap::new()), + children_specs_by_id: Mutex::new(HashMap::new()), } } @@ -339,20 +390,44 @@ where { let current_is_optional = wanted.optional.unwrap_or(false) || parent_optional; - let result = resolver - .resolve(&wanted, &ctx.base_opts) - .await - .map_err(|err: ResolveError| ResolveDependencyTreeError::Resolve(err.to_string()))?; - let Some(result) = result else { - return Err(ResolveDependencyTreeError::SpecNotSupported { - specifier: render_specifier(&wanted), - }); + // Memoise the per-wanted resolve. The first caller for a given + // `(alias, bare_specifier, optional)` runs the resolver chain and + // stores the `Arc` on `ctx.resolved_by_wanted`; + // every later caller for the same wanted dep clones the `Arc` and + // skips the chain entirely. Concurrent first-callers can both miss + // the cache and run `resolver.resolve` in parallel — the resolver's + // own per-cache-key semaphore (`pick_package::fetch_locker`) + // already coalesces those into a single network fetch, so the + // doubled work is bounded to in-memory packument lookups + semver + // matching, and the second to finish loses the `insert` race + // harmlessly (the entry holds an `Arc` to an equivalent + // `ResolveResult`). + let cache_key: WantedKey = + (wanted.alias.clone(), wanted.bare_specifier.clone(), wanted.optional); + let cached = lock_recoverable(&ctx.resolved_by_wanted).get(&cache_key).map(Arc::clone); + let result = match cached { + Some(result) => result, + None => { + let result = + resolver.resolve(&wanted, &ctx.base_opts).await.map_err(|err: ResolveError| { + ResolveDependencyTreeError::Resolve(err.to_string()) + })?; + let Some(result) = result else { + return Err(ResolveDependencyTreeError::SpecNotSupported { + specifier: render_specifier(&wanted), + }); + }; + // Wrap in `Arc` once so the cache, the per-id + // `ResolvedPackage` envelope, and the later peer-resolved + // graph node share one heap-allocated `ResolveResult` + // instead of cloning every `String` field per occurrence. + let result = Arc::new(result); + lock_recoverable(&ctx.resolved_by_wanted) + .entry(cache_key) + .or_insert_with(|| Arc::clone(&result)); + result + } }; - // Wrap in `Arc` once so the two store sites below (the per-id - // `ResolvedPackage` envelope and the later peer-resolved graph - // node) share one heap-allocated `ResolveResult` instead of - // cloning every `String` field per occurrence. - let result = Arc::new(result); if let Some(violation) = result.policy_violation.clone() { lock_recoverable(&ctx.policy_violations).push(violation); @@ -433,14 +508,30 @@ where let next_ancestors: Vec = ancestor_ids.iter().cloned().chain(std::iter::once(id.clone())).collect(); - let child_specs = extract_children(&result); + // Look up cached children specs first; only walk the manifest on + // a miss. The cache value is held by `Arc` so revisits clone the + // refcount instead of the inner `Vec<(String, String, bool)>`. + let child_specs = { + let cache = lock_recoverable(&ctx.children_specs_by_id); + cache.get(&id).map(Arc::clone) + }; + let child_specs = match child_specs { + Some(specs) => specs, + None => { + let specs = Arc::new(extract_children(&result)); + lock_recoverable(&ctx.children_specs_by_id) + .entry(id.clone()) + .or_insert_with(|| Arc::clone(&specs)); + specs + } + }; let child_results = child_specs - .into_iter() + .iter() .map(|(child_name, child_range, child_optional)| { let child_wanted = WantedDependency { - alias: Some(child_name), - bare_specifier: Some(child_range), - optional: Some(child_optional), + alias: Some(child_name.clone()), + bare_specifier: Some(child_range.clone()), + optional: Some(*child_optional), ..WantedDependency::default() }; let next_ancestors = next_ancestors.clone(); @@ -593,9 +684,7 @@ fn render_specifier(wanted: &WantedDependency) -> String { /// `optionalDependencies`. The walker propagates this through /// `current_is_optional` so [`ResolvedPackage::optional`] reflects /// whether every path to the node went through an optional edge. -fn extract_children( - result: &pacquet_resolving_resolver_base::ResolveResult, -) -> Vec<(String, String, bool)> { +fn extract_children(result: &pacquet_resolving_resolver_base::ResolveResult) -> Vec { let Some(manifest) = result.manifest.as_ref() else { return Vec::new() }; let mut out = Vec::new(); collect_deps(manifest, "dependencies", false, &mut out); @@ -603,12 +692,7 @@ fn extract_children( out } -fn collect_deps( - manifest: &Value, - key: &str, - optional: bool, - out: &mut Vec<(String, String, bool)>, -) { +fn collect_deps(manifest: &Value, key: &str, optional: bool, out: &mut Vec) { let Some(map) = manifest.get(key).and_then(Value::as_object) else { return }; for (name, range) in map { if let Some(range_str) = range.as_str() {