From 57c3094e2d14297cd63f24098da1fc3e95ce0086 Mon Sep 17 00:00:00 2001 From: Zoltan Kochan Date: Fri, 22 May 2026 01:18:22 +0200 Subject: [PATCH] fix(pacquet/resolving-npm-resolver): scope picked-manifest cache key by registry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `PickedManifestCache` is shared across the npm (default + JSR) and named-registry resolvers, but its key was `{name}@{version}` only. Two registries can serve different artifacts under the same `name@version` — a public + private collision, or a fork — so a collision would hand the second resolver the first resolver's manifest, breaking the downstream dependency graph / peer extraction / lockfile metadata. Scope the key by registry (`{registry}\x00{name}@{version}`), matching the existing `PackageMetaCache` shape, and add a regression test that points two resolvers at different mock registries (each serving distinct `dependencies` for `acme@1.0.0`) through one shared cache. --- .../src/named_registry_resolver.rs | 1 + .../src/npm_resolver.rs | 16 ++- .../src/npm_resolver/tests.rs | 126 ++++++++++++++++++ .../src/pick_package.rs | 23 ++-- 4 files changed, 155 insertions(+), 11 deletions(-) diff --git a/pacquet/crates/resolving-npm-resolver/src/named_registry_resolver.rs b/pacquet/crates/resolving-npm-resolver/src/named_registry_resolver.rs index 4dcb527d74..2a732782b4 100644 --- a/pacquet/crates/resolving-npm-resolver/src/named_registry_resolver.rs +++ b/pacquet/crates/resolving-npm-resolver/src/named_registry_resolver.rs @@ -155,6 +155,7 @@ impl NamedRegistryResolver { NAMED_REGISTRY_RESOLVED_VIA, opts.published_by, opts.published_by_exclude.as_ref(), + registry, &self.picked_manifest_cache, )?; diff --git a/pacquet/crates/resolving-npm-resolver/src/npm_resolver.rs b/pacquet/crates/resolving-npm-resolver/src/npm_resolver.rs index 3d5ec27dfd..40d73451e1 100644 --- a/pacquet/crates/resolving-npm-resolver/src/npm_resolver.rs +++ b/pacquet/crates/resolving-npm-resolver/src/npm_resolver.rs @@ -228,6 +228,7 @@ impl NpmResolver { NPM_REGISTRY_RESOLVED_VIA, opts.published_by, opts.published_by_exclude.as_ref(), + ®istry, &self.picked_manifest_cache, )?; @@ -275,6 +276,7 @@ impl NpmResolver { JSR_REGISTRY_RESOLVED_VIA, opts.published_by, opts.published_by_exclude.as_ref(), + registry, &self.picked_manifest_cache, )?; @@ -394,6 +396,7 @@ pub(crate) fn build_resolve_result( resolved_via: &str, published_by: Option>, published_by_exclude: Option<&PackageVersionPolicy>, + registry: &str, picked_manifest_cache: &crate::PickedManifestCache, ) -> Result { let pkg_name = @@ -417,9 +420,16 @@ pub(crate) fn build_resolve_result( }); let published_at = meta.published_at(&version_str).map(str::to_string); // Dedupe `serde_json::to_value(picked)` across picks of the - // same `(pkg_name, version)` pair — see [`PickedManifestCache`] - // for the rationale. - let manifest_cache_key = format!("{}@{version_str}", picked.name); + // same `(registry, pkg_name, version)` triple — see + // [`PickedManifestCache`] for the rationale. The cache is shared + // across the npm / JSR / named-registry resolvers, so the key + // has to scope by `registry` too; two registries may serve + // different artifacts under the same `name@version`, and + // collapsing them would hand the second registry's resolver + // the first registry's manifest — wrong dependency graph, + // wrong peers, wrong lockfile metadata. Matches `meta_cache`'s + // `{registry}\x00{name}` scoping shape. + let manifest_cache_key = format!("{registry}\x00{}@{version_str}", picked.name); let manifest = match picked_manifest_cache.get(&manifest_cache_key) { Some(cached) => Some(Arc::clone(cached.value())), None => { diff --git a/pacquet/crates/resolving-npm-resolver/src/npm_resolver/tests.rs b/pacquet/crates/resolving-npm-resolver/src/npm_resolver/tests.rs index 59273946ff..d2ae4cf11b 100644 --- a/pacquet/crates/resolving-npm-resolver/src/npm_resolver/tests.rs +++ b/pacquet/crates/resolving-npm-resolver/src/npm_resolver/tests.rs @@ -341,3 +341,129 @@ async fn jsr_specifier_with_invalid_scope_propagates_parser_error() { // so we can't downcast to the variant directly. assert_eq!(msg, "Package names from JSR must have a scope", "unexpected error message: {msg}"); } + +/// Two NpmResolvers pointing at different registries, sharing the +/// same `picked_manifest_cache`, must not hand each other the +/// other's manifest when both happen to pick `acme@1.0.0`. Two +/// registries can serve different artifacts under the same +/// `name@version` (a public + private package collision, or a +/// fork), and collapsing the cache key to `name@version` alone +/// would propagate one registry's manifest into the other +/// resolver's `ResolveResult`, breaking the downstream dependency +/// graph / peer extraction / lockfile metadata. +/// +/// The fixture for each registry serves a payload that differs by +/// `dependencies`, so the cache leak shows up as the second +/// resolver's `manifest.dependencies` being the *first* registry's +/// when the bug is present. With the registry-scoped key in place +/// each resolver gets its own manifest. +#[tokio::test] +async fn shared_manifest_cache_does_not_leak_across_registries() { + fn body_with_dep(dep_name: &str, dep_range: &str) -> String { + format!( + r#"{{ + "name": "acme", + "dist-tags": {{ "latest": "1.0.0" }}, + "modified": "2025-01-15T12:00:00.000Z", + "time": {{ "1.0.0": "2024-01-10T08:30:00.000Z" }}, + "versions": {{ + "1.0.0": {{ + "name": "acme", + "version": "1.0.0", + "dependencies": {{ "{dep_name}": "{dep_range}" }}, + "dist": {{ + "integrity": "sha512-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", + "shasum": "0000000000000000000000000000000000000000", + "tarball": "https://registry/acme-1.0.0.tgz" + }} + }} + }} + }}"#, + ) + } + + let mut server_a = mockito::Server::new_async().await; + let _mock_a = server_a + .mock("GET", "/acme") + .with_status(200) + .with_body(body_with_dep("left-pad", "^1.0.0")) + .create_async() + .await; + let mut server_b = mockito::Server::new_async().await; + let _mock_b = server_b + .mock("GET", "/acme") + .with_status(200) + .with_body(body_with_dep("right-pad", "^2.0.0")) + .create_async() + .await; + + // Shared cache — the leak path. The fix is the cache key + // including the registry; without it, whichever resolver runs + // second would return the other's manifest. + let shared_picked_cache = shared_picked_manifest_cache(); + let shared_fetch_locker = shared_packument_fetch_locker(); + + let make_resolver = |registry: String| -> (NpmResolver, TempDir) { + let mut registries = HashMap::new(); + registries.insert("default".to_string(), registry); + let cache_dir = TempDir::new().expect("tempdir"); + let resolver = NpmResolver { + registries, + named_registries: HashMap::new(), + http_client: Arc::new(ThrottledClient::default()), + auth_headers: Arc::new(AuthHeaders::default()), + meta_cache: Arc::new(InMemoryPackageMetaCache::default()), + fetch_locker: Arc::clone(&shared_fetch_locker), + picked_manifest_cache: Arc::clone(&shared_picked_cache), + cache_dir: Some(cache_dir.path().to_path_buf()), + offline: false, + prefer_offline: false, + ignore_missing_time_field: false, + full_metadata: false, + }; + (resolver, cache_dir) + }; + + let (resolver_a, _cache_dir_a) = make_resolver(format!("{}/", server_a.url())); + let (resolver_b, _cache_dir_b) = make_resolver(format!("{}/", server_b.url())); + + let wanted = WantedDependency { + alias: Some("acme".to_string()), + bare_specifier: Some("1.0.0".to_string()), + ..WantedDependency::default() + }; + + let result_a = resolver_a + .resolve(&wanted, &ResolveOptions::default()) + .await + .expect("resolver A") + .expect("resolver A picks"); + let result_b = resolver_b + .resolve(&wanted, &ResolveOptions::default()) + .await + .expect("resolver B") + .expect("resolver B picks"); + + let deps_a = result_a + .manifest + .as_ref() + .and_then(|m| m.get("dependencies")) + .and_then(|d| d.as_object()) + .expect("resolver A manifest carries dependencies"); + let deps_b = result_b + .manifest + .as_ref() + .and_then(|m| m.get("dependencies")) + .and_then(|d| d.as_object()) + .expect("resolver B manifest carries dependencies"); + + assert!(deps_a.contains_key("left-pad"), "resolver A keeps its own manifest: {deps_a:?}"); + assert!( + deps_b.contains_key("right-pad"), + "resolver B got its own manifest, not resolver A's: {deps_b:?}", + ); + assert!( + !deps_b.contains_key("left-pad"), + "resolver B must not see resolver A's `left-pad`: {deps_b:?}", + ); +} diff --git a/pacquet/crates/resolving-npm-resolver/src/pick_package.rs b/pacquet/crates/resolving-npm-resolver/src/pick_package.rs index 8f5284062f..a3d1286659 100644 --- a/pacquet/crates/resolving-npm-resolver/src/pick_package.rs +++ b/pacquet/crates/resolving-npm-resolver/src/pick_package.rs @@ -137,20 +137,27 @@ pub fn shared_packument_fetch_locker() -> PackumentFetchLocker { Arc::new(DashMap::new()) } -/// Per-`(pkg_name, version)` cache for the resolver's serialized -/// `manifest` JSON. The npm resolver builds +/// Per-`(registry, pkg_name, version)` cache for the resolver's +/// serialized `manifest` JSON. The npm resolver builds /// [`pacquet_resolving_resolver_base::ResolveResult`]'s `manifest` /// field via `serde_json::to_value(picked)`; when many resolves /// pick the same version of the same package (the common case for /// shared deps like `react`, `lodash`, ...) every duplicate would /// otherwise re-walk and re-allocate the same JSON tree. Cache the -/// `Arc` once per `(pkg_name, version)` pair so the second -/// pick onwards is an `Arc::clone` instead of a full reserialise. +/// `Arc` once per `(registry, pkg_name, version)` triple so +/// the second pick onwards is an `Arc::clone` instead of a full +/// reserialise. /// -/// Shared across [`crate::NpmResolver`] and -/// [`crate::NamedRegistryResolver`] for the same reasons -/// [`PackumentFetchLocker`] is — both resolvers can pick the same -/// `(pkg_name, version)` pair in one install. +/// Shared across [`crate::NpmResolver`] (default + JSR registries) +/// and [`crate::NamedRegistryResolver`] (`:` specifiers). +/// The key includes `registry` because two registries can serve +/// different artifacts under the same `name@version` — a public +/// `lodash@4.17.21` and a privately-hosted package of the same +/// name-version pair are not interchangeable, and a registry- +/// agnostic key would hand one resolver the other's manifest, +/// breaking the downstream dependency graph / peer extraction / +/// lockfile metadata. Same `{registry}\x00…` scoping shape as +/// [`PackageMetaCache`]. pub type PickedManifestCache = Arc>>; /// Construct a fresh [`PickedManifestCache`] for a new install.