fix(pacquet/resolving-npm-resolver): scope picked-manifest cache key by registry

`PickedManifestCache` is shared across the npm (default + JSR) and
named-registry resolvers, but its key was `{name}@{version}` only.
Two registries can serve different artifacts under the same
`name@version` — a public + private collision, or a fork — so a
collision would hand the second resolver the first resolver's
manifest, breaking the downstream dependency graph / peer
extraction / lockfile metadata.

Scope the key by registry (`{registry}\x00{name}@{version}`), matching
the existing `PackageMetaCache` shape, and add a regression test that
points two resolvers at different mock registries (each serving
distinct `dependencies` for `acme@1.0.0`) through one shared cache.
This commit is contained in:
Zoltan Kochan
2026-05-22 01:18:22 +02:00
parent 0343a47216
commit 57c3094e2d
4 changed files with 155 additions and 11 deletions

View File

@@ -155,6 +155,7 @@ impl<Cache: PackageMetaCache + 'static> NamedRegistryResolver<Cache> {
NAMED_REGISTRY_RESOLVED_VIA,
opts.published_by,
opts.published_by_exclude.as_ref(),
registry,
&self.picked_manifest_cache,
)?;

View File

@@ -228,6 +228,7 @@ impl<Cache: PackageMetaCache + 'static> NpmResolver<Cache> {
NPM_REGISTRY_RESOLVED_VIA,
opts.published_by,
opts.published_by_exclude.as_ref(),
&registry,
&self.picked_manifest_cache,
)?;
@@ -275,6 +276,7 @@ impl<Cache: PackageMetaCache + 'static> NpmResolver<Cache> {
JSR_REGISTRY_RESOLVED_VIA,
opts.published_by,
opts.published_by_exclude.as_ref(),
registry,
&self.picked_manifest_cache,
)?;
@@ -394,6 +396,7 @@ pub(crate) fn build_resolve_result(
resolved_via: &str,
published_by: Option<DateTime<Utc>>,
published_by_exclude: Option<&PackageVersionPolicy>,
registry: &str,
picked_manifest_cache: &crate::PickedManifestCache,
) -> Result<ResolveResult, ResolveError> {
let pkg_name =
@@ -417,9 +420,16 @@ pub(crate) fn build_resolve_result(
});
let published_at = meta.published_at(&version_str).map(str::to_string);
// Dedupe `serde_json::to_value(picked)` across picks of the
// same `(pkg_name, version)` pair — see [`PickedManifestCache`]
// for the rationale.
let manifest_cache_key = format!("{}@{version_str}", picked.name);
// same `(registry, pkg_name, version)` triple — see
// [`PickedManifestCache`] for the rationale. The cache is shared
// across the npm / JSR / named-registry resolvers, so the key
// has to scope by `registry` too; two registries may serve
// different artifacts under the same `name@version`, and
// collapsing them would hand the second registry's resolver
// the first registry's manifest — wrong dependency graph,
// wrong peers, wrong lockfile metadata. Matches `meta_cache`'s
// `{registry}\x00{name}` scoping shape.
let manifest_cache_key = format!("{registry}\x00{}@{version_str}", picked.name);
let manifest = match picked_manifest_cache.get(&manifest_cache_key) {
Some(cached) => Some(Arc::clone(cached.value())),
None => {

View File

@@ -341,3 +341,129 @@ async fn jsr_specifier_with_invalid_scope_propagates_parser_error() {
// so we can't downcast to the variant directly.
assert_eq!(msg, "Package names from JSR must have a scope", "unexpected error message: {msg}");
}
/// Two NpmResolvers pointing at different registries, sharing the
/// same `picked_manifest_cache`, must not hand each other the
/// other's manifest when both happen to pick `acme@1.0.0`. Two
/// registries can serve different artifacts under the same
/// `name@version` (a public + private package collision, or a
/// fork), and collapsing the cache key to `name@version` alone
/// would propagate one registry's manifest into the other
/// resolver's `ResolveResult`, breaking the downstream dependency
/// graph / peer extraction / lockfile metadata.
///
/// The fixture for each registry serves a payload that differs by
/// `dependencies`, so the cache leak shows up as the second
/// resolver's `manifest.dependencies` being the *first* registry's
/// when the bug is present. With the registry-scoped key in place
/// each resolver gets its own manifest.
#[tokio::test]
async fn shared_manifest_cache_does_not_leak_across_registries() {
fn body_with_dep(dep_name: &str, dep_range: &str) -> String {
format!(
r#"{{
"name": "acme",
"dist-tags": {{ "latest": "1.0.0" }},
"modified": "2025-01-15T12:00:00.000Z",
"time": {{ "1.0.0": "2024-01-10T08:30:00.000Z" }},
"versions": {{
"1.0.0": {{
"name": "acme",
"version": "1.0.0",
"dependencies": {{ "{dep_name}": "{dep_range}" }},
"dist": {{
"integrity": "sha512-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==",
"shasum": "0000000000000000000000000000000000000000",
"tarball": "https://registry/acme-1.0.0.tgz"
}}
}}
}}
}}"#,
)
}
let mut server_a = mockito::Server::new_async().await;
let _mock_a = server_a
.mock("GET", "/acme")
.with_status(200)
.with_body(body_with_dep("left-pad", "^1.0.0"))
.create_async()
.await;
let mut server_b = mockito::Server::new_async().await;
let _mock_b = server_b
.mock("GET", "/acme")
.with_status(200)
.with_body(body_with_dep("right-pad", "^2.0.0"))
.create_async()
.await;
// Shared cache — the leak path. The fix is the cache key
// including the registry; without it, whichever resolver runs
// second would return the other's manifest.
let shared_picked_cache = shared_picked_manifest_cache();
let shared_fetch_locker = shared_packument_fetch_locker();
let make_resolver = |registry: String| -> (NpmResolver<InMemoryPackageMetaCache>, TempDir) {
let mut registries = HashMap::new();
registries.insert("default".to_string(), registry);
let cache_dir = TempDir::new().expect("tempdir");
let resolver = NpmResolver {
registries,
named_registries: HashMap::new(),
http_client: Arc::new(ThrottledClient::default()),
auth_headers: Arc::new(AuthHeaders::default()),
meta_cache: Arc::new(InMemoryPackageMetaCache::default()),
fetch_locker: Arc::clone(&shared_fetch_locker),
picked_manifest_cache: Arc::clone(&shared_picked_cache),
cache_dir: Some(cache_dir.path().to_path_buf()),
offline: false,
prefer_offline: false,
ignore_missing_time_field: false,
full_metadata: false,
};
(resolver, cache_dir)
};
let (resolver_a, _cache_dir_a) = make_resolver(format!("{}/", server_a.url()));
let (resolver_b, _cache_dir_b) = make_resolver(format!("{}/", server_b.url()));
let wanted = WantedDependency {
alias: Some("acme".to_string()),
bare_specifier: Some("1.0.0".to_string()),
..WantedDependency::default()
};
let result_a = resolver_a
.resolve(&wanted, &ResolveOptions::default())
.await
.expect("resolver A")
.expect("resolver A picks");
let result_b = resolver_b
.resolve(&wanted, &ResolveOptions::default())
.await
.expect("resolver B")
.expect("resolver B picks");
let deps_a = result_a
.manifest
.as_ref()
.and_then(|m| m.get("dependencies"))
.and_then(|d| d.as_object())
.expect("resolver A manifest carries dependencies");
let deps_b = result_b
.manifest
.as_ref()
.and_then(|m| m.get("dependencies"))
.and_then(|d| d.as_object())
.expect("resolver B manifest carries dependencies");
assert!(deps_a.contains_key("left-pad"), "resolver A keeps its own manifest: {deps_a:?}");
assert!(
deps_b.contains_key("right-pad"),
"resolver B got its own manifest, not resolver A's: {deps_b:?}",
);
assert!(
!deps_b.contains_key("left-pad"),
"resolver B must not see resolver A's `left-pad`: {deps_b:?}",
);
}

View File

@@ -137,20 +137,27 @@ pub fn shared_packument_fetch_locker() -> PackumentFetchLocker {
Arc::new(DashMap::new())
}
/// Per-`(pkg_name, version)` cache for the resolver's serialized
/// `manifest` JSON. The npm resolver builds
/// Per-`(registry, pkg_name, version)` cache for the resolver's
/// serialized `manifest` JSON. The npm resolver builds
/// [`pacquet_resolving_resolver_base::ResolveResult`]'s `manifest`
/// field via `serde_json::to_value(picked)`; when many resolves
/// pick the same version of the same package (the common case for
/// shared deps like `react`, `lodash`, ...) every duplicate would
/// otherwise re-walk and re-allocate the same JSON tree. Cache the
/// `Arc<Value>` once per `(pkg_name, version)` pair so the second
/// pick onwards is an `Arc::clone` instead of a full reserialise.
/// `Arc<Value>` once per `(registry, pkg_name, version)` triple so
/// the second pick onwards is an `Arc::clone` instead of a full
/// reserialise.
///
/// Shared across [`crate::NpmResolver`] and
/// [`crate::NamedRegistryResolver`] for the same reasons
/// [`PackumentFetchLocker`] is — both resolvers can pick the same
/// `(pkg_name, version)` pair in one install.
/// Shared across [`crate::NpmResolver`] (default + JSR registries)
/// and [`crate::NamedRegistryResolver`] (`<alias>:` specifiers).
/// The key includes `registry` because two registries can serve
/// different artifacts under the same `name@version` — a public
/// `lodash@4.17.21` and a privately-hosted package of the same
/// name-version pair are not interchangeable, and a registry-
/// agnostic key would hand one resolver the other's manifest,
/// breaking the downstream dependency graph / peer extraction /
/// lockfile metadata. Same `{registry}\x00…` scoping shape as
/// [`PackageMetaCache`].
pub type PickedManifestCache = Arc<DashMap<String, Arc<serde_json::Value>>>;
/// Construct a fresh [`PickedManifestCache`] for a new install.