diff --git a/.changeset/pick-registry-unscoped-npm-alias.md b/.changeset/pick-registry-unscoped-npm-alias.md new file mode 100644 index 0000000000..37b4a796dd --- /dev/null +++ b/.changeset/pick-registry-unscoped-npm-alias.md @@ -0,0 +1,6 @@ +--- +"@pnpm/config.pick-registry-for-package": patch +"pnpm": patch +--- + +Fix `pickRegistryForPackage` returning the wrong registry for an unscoped `npm:` alias under a scoped local name. A manifest entry like `"@private/foo": "npm:lodash@^1"` was routing the `lodash` fetch through `registries["@private"]`, even though `lodash` is unscoped and doesn't live on that registry. The npm-alias branch now returns the alias target's own scope (or `null` for an unscoped target, falling through to `registries.default`) instead of leaking into the local key's scope. diff --git a/Cargo.lock b/Cargo.lock index 12f4694c44..b20e8e60ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2258,6 +2258,7 @@ name = "pacquet-package-manager" version = "0.0.1" dependencies = [ "async-recursion", + "chrono", "dashmap", "derive_more", "dunce", @@ -2285,6 +2286,7 @@ dependencies = [ "pacquet-registry", "pacquet-registry-mock", "pacquet-reporter", + "pacquet-resolving-deps-resolver", "pacquet-resolving-npm-resolver", "pacquet-resolving-resolver-base", "pacquet-store-dir", @@ -2408,6 +2410,25 @@ dependencies = [ "tokio", ] +[[package]] +name = "pacquet-resolving-deps-resolver" +version = "0.0.1" +dependencies = [ + "async-recursion", + "derive_more", + "futures-util", + "miette 7.6.0", + "node-semver", + "pacquet-lockfile", + "pacquet-package-manifest", + "pacquet-resolving-resolver-base", + "pipe-trait", + "pretty_assertions", + "serde_json", + "tempfile", + "tokio", +] + [[package]] name = "pacquet-resolving-npm-resolver" version = "0.0.1" @@ -2444,6 +2465,8 @@ version = "0.0.1" name = "pacquet-resolving-resolver-base" version = "0.0.1" dependencies = [ + "chrono", + "pacquet-config", "pacquet-lockfile", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 016b193916..665c532964 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -37,6 +37,7 @@ pacquet-reporter = { path = "pacquet/crates/reporter" } pacquet-patching = { path = "pacquet/crates/patching" } pacquet-real-hoist = { path = "pacquet/crates/real-hoist" } pacquet-resolving-default-resolver = { path = "pacquet/crates/resolving-default-resolver" } +pacquet-resolving-deps-resolver = { path = "pacquet/crates/resolving-deps-resolver" } pacquet-resolving-npm-resolver = { path = "pacquet/crates/resolving-npm-resolver" } pacquet-resolving-parse-wanted-dependency = { path = "pacquet/crates/resolving-parse-wanted-dependency" } pacquet-resolving-resolver-base = { path = "pacquet/crates/resolving-resolver-base" } diff --git a/config/pick-registry-for-package/src/index.ts b/config/pick-registry-for-package/src/index.ts index 71cb954f87..e0cfb69d53 100644 --- a/config/pick-registry-for-package/src/index.ts +++ b/config/pick-registry-for-package/src/index.ts @@ -7,10 +7,15 @@ export function pickRegistryForPackage (registries: Registries, packageName: str function getScope (pkgName: string, bareSpecifier?: string): string | null { if (bareSpecifier?.startsWith('npm:')) { - bareSpecifier = bareSpecifier.slice(4) - if (bareSpecifier[0] === '@') { - return bareSpecifier.substring(0, bareSpecifier.indexOf('/')) + const target = bareSpecifier.slice(4) + if (target[0] === '@') { + return target.substring(0, target.indexOf('/')) } + // Unscoped `npm:` alias target (e.g. `"@private/foo": "npm:lodash@^1"`). + // The package being fetched is unscoped, so the local alias's scope must + // not drive registry routing — `lodash` doesn't live on the `@private` + // registry. Fall through to the default registry instead. + return null } if (pkgName[0] === '@') { return pkgName.substring(0, pkgName.indexOf('/')) diff --git a/config/pick-registry-for-package/test/index.spec.ts b/config/pick-registry-for-package/test/index.spec.ts index c82d41bc9d..719af0a209 100644 --- a/config/pick-registry-for-package/test/index.spec.ts +++ b/config/pick-registry-for-package/test/index.spec.ts @@ -10,3 +10,28 @@ test('pick correct scope', () => { expect(pickRegistryForPackage(registries, '@random/lodash')).toBe('https://registry.npmjs.org/') expect(pickRegistryForPackage(registries, '@random/lodash', 'npm:@private/lodash@1')).toBe('https://private.registry.com/') }) + +// An unscoped `npm:` alias target (e.g. `"@private/foo": "npm:lodash@^1"`) +// must NOT route through the local alias's scope: the fetched package is +// `lodash` (unscoped) and doesn't live on the `@private` registry. The npm- +// alias branch returns `null` in that case so the call falls through to +// `registries.default`. +test('unscoped npm-alias target routes to default, not the local alias scope', () => { + const registries = { + default: 'https://registry.npmjs.org/', + '@private': 'https://private.registry.com/', + } + expect(pickRegistryForPackage(registries, '@private/foo', 'npm:lodash@^1')).toBe('https://registry.npmjs.org/') +}) + +// Scoped local + scoped `npm:` target in a different scope: the target's +// scope wins. The package being fetched is `@scope2/bar`, so routing +// follows `@scope2`, not the local `@scope1/` slot. +test('scoped npm-alias target in different scope wins over local scope', () => { + const registries = { + default: 'https://registry.npmjs.org/', + '@scope1': 'https://scope1.registry/', + '@scope2': 'https://scope2.registry/', + } + expect(pickRegistryForPackage(registries, '@scope1/foo', 'npm:@scope2/bar@^1')).toBe('https://scope2.registry/') +}) diff --git a/pacquet/crates/config/src/version_policy.rs b/pacquet/crates/config/src/version_policy.rs index 3330d71e65..fb76491ff5 100644 --- a/pacquet/crates/config/src/version_policy.rs +++ b/pacquet/crates/config/src/version_policy.rs @@ -130,6 +130,7 @@ pub enum PolicyMatch { /// exact version unions (`lodash@4.17.21 || 4.17.22`) — different from /// `allowBuilds`, which lands as a literal set via /// [`expand_package_version_specs`]. +#[derive(Clone)] pub struct PackageVersionPolicy { rules: Vec, } @@ -145,6 +146,7 @@ impl std::fmt::Debug for PackageVersionPolicy { } } +#[derive(Clone)] struct VersionPolicyRule { name_matcher: Matcher, exact_versions: Vec, diff --git a/pacquet/crates/package-manager/Cargo.toml b/pacquet/crates/package-manager/Cargo.toml index ce3740b07c..7f85cd20db 100644 --- a/pacquet/crates/package-manager/Cargo.toml +++ b/pacquet/crates/package-manager/Cargo.toml @@ -28,6 +28,7 @@ pacquet-patching = { workspace = true } pacquet-real-hoist = { workspace = true } pacquet-registry = { workspace = true } pacquet-reporter = { workspace = true } +pacquet-resolving-deps-resolver = { workspace = true } pacquet-resolving-npm-resolver = { workspace = true } pacquet-resolving-resolver-base = { workspace = true } pacquet-store-dir = { workspace = true } @@ -36,6 +37,7 @@ pacquet-workspace = { workspace = true } pacquet-workspace-state = { workspace = true } async-recursion = { workspace = true } +chrono = { workspace = true } dashmap = { workspace = true } derive_more = { workspace = true } futures-util = { workspace = true } diff --git a/pacquet/crates/package-manager/src/install.rs b/pacquet/crates/package-manager/src/install.rs index de4606b38a..43ecccafb3 100644 --- a/pacquet/crates/package-manager/src/install.rs +++ b/pacquet/crates/package-manager/src/install.rs @@ -513,6 +513,7 @@ where tarball_mem_cache, resolved_packages, http_client, + http_client_arc: Arc::clone(&http_client_arc), config, manifest, dependency_groups, diff --git a/pacquet/crates/package-manager/src/install_package_from_registry.rs b/pacquet/crates/package-manager/src/install_package_from_registry.rs index f3426f88a2..aab4234cb2 100644 --- a/pacquet/crates/package-manager/src/install_package_from_registry.rs +++ b/pacquet/crates/package-manager/src/install_package_from_registry.rs @@ -5,31 +5,30 @@ use crate::{ use derive_more::{Display, Error}; use miette::Diagnostic; use pacquet_config::Config; +use pacquet_lockfile::LockfileResolution; use pacquet_network::ThrottledClient; -use pacquet_package_manifest::PackageManifest; -use pacquet_registry::{Package, PackageTag, PackageVersion, RegistryError}; use pacquet_reporter::{LogEvent, LogLevel, ProgressLog, ProgressMessage, Reporter}; +use pacquet_resolving_resolver_base::ResolveResult; use pacquet_store_dir::{SharedReadonlyStoreIndex, SharedVerifiedFilesCache, StoreIndexWriter}; use pacquet_tarball::{DownloadTarballToStore, MemCache, TarballError}; +use serde_json::Value; +use ssri::Integrity; use std::{ path::Path, sync::{Arc, atomic::AtomicU8}, }; -/// This subroutine executes the following and returns the package -/// * Retrieves the package from the registry -/// * Extracts the tarball to global store directory (~/Library/../pacquet) -/// * Links global store directory to virtual dir (node_modules/.pacquet/..) +/// Materialize one pre-resolved package on disk: /// -/// `name` is the manifest dependency key — the directory name the -/// package will be exposed as inside `node_modules`. For an npm-alias -/// entry (`"foo": "npm:bar@^1.0.0"`), `name` is the local alias (`foo`) -/// and the actual registry package name (`bar`) is parsed out of -/// `version_range` before the registry lookup. +/// * Downloads the tarball into the global store directory. +/// * Imports (reflinks, hardlinks, or copies) the unpacked files into +/// `//node_modules//`. +/// * Symlinks `/` to the virtual-store +/// directory. /// -/// `symlink_path` will be appended by `name`. Therefore, it should be -/// resolved into the node_modules folder of a subdependency such as -/// `node_modules/.pacquet/fastify@1.0.0/node_modules`. +/// `alias` is the local install name in `node_modules`: the manifest +/// key. For an npm-alias entry (`"foo": "npm:bar@^1"`) it's the alias +/// (`foo`); the registry-side name is read from [`ResolveResult::id`]. #[must_use] pub struct InstallPackageFromRegistry<'a> { pub tarball_mem_cache: &'a MemCache, @@ -49,68 +48,45 @@ pub struct InstallPackageFromRegistry<'a> { /// [`pacquet_reporter::StageLog`]. pub requester: &'a str, pub node_modules_dir: &'a Path, - pub name: &'a str, - pub version_range: &'a str, + /// Local install name in `node_modules/`. + pub alias: &'a str, + /// Pre-resolved package returned by the resolver chain. + pub resolution: &'a ResolveResult, + /// `true` when this is the first edge encountered for this + /// `(name, version)` slot. Gates the per-package work: the tarball + /// download, the virtual-store import, and the + /// `pnpm:progress resolved` / `pnpm:progress imported` emits all + /// fire on the first visit. Subsequent visitors only refresh the + /// per-parent symlink under `node_modules_dir/`, mirroring + /// upstream's per-package (not per-edge) progress signalling at + /// . + pub first_visit: bool, } /// Error type of [`InstallPackageFromRegistry`]. #[derive(Debug, Display, Error, Diagnostic)] pub enum InstallPackageFromRegistryError { - FetchFromRegistry(#[error(source)] RegistryError), DownloadTarballToStore(#[error(source)] TarballError), ImportIndexedDir(#[error(source)] ImportIndexedDirError), SymlinkPackage(#[error(source)] SymlinkPackageError), + + /// The resolver produced a resolution shape the npm install path + /// can't materialize (today: anything other than a tarball + /// resolution carrying an integrity hash). Surfaces with a + /// pacquet-internal code; the matching pnpm error is upstream's + /// generic install failure for the same shape. + #[display("Unsupported resolution shape for npm install path: {detail}")] + #[diagnostic(code(pacquet_package_manager::unsupported_resolution))] + UnsupportedResolution { + #[error(not(source))] + detail: String, + }, } impl<'a> InstallPackageFromRegistry<'a> { /// Execute the subroutine. pub async fn run( self, - ) -> Result { - let &InstallPackageFromRegistry { http_client, config, name, version_range, .. } = &self; - - // Strip any `npm:@` alias prefix before talking to - // the registry. `name` (the manifest key) stays as the directory - // name inside `node_modules`. Unversioned aliases (`npm:foo`) are - // resolved to `"latest"` by `resolve_registry_dependency`. - let (registry_name, version_range) = - PackageManifest::resolve_registry_dependency(name, version_range); - - // Try parsing as a `PackageTag` first: this covers both the - // `"latest"` tag (including unversioned `npm:` aliases) and - // pinned versions like `"1.0.0"`. Semver ranges like `"^1.0.0"` - // fail `PackageTag::from_str` and fall through to the range - // resolution branch below. - Ok(if let Ok(tag) = version_range.parse::() { - let package_version = PackageVersion::fetch_from_registry( - registry_name, - tag, - http_client, - &config.registry, - &config.auth_headers, - ) - .await - .map_err(InstallPackageFromRegistryError::FetchFromRegistry)?; - self.install_package_version::(&package_version).await?; - package_version - } else { - let package = Package::fetch_from_registry( - registry_name, - http_client, - &config.registry, - &config.auth_headers, - ) - .await - .map_err(InstallPackageFromRegistryError::FetchFromRegistry)?; - let package_version = package.pinned_version(version_range).unwrap(); // TODO: propagate error for when no version satisfies range - self.install_package_version::(package_version).await?; - package_version.clone() - }) - } - - async fn install_package_version( - self, - package_version: &PackageVersion, ) -> Result<(), InstallPackageFromRegistryError> { let InstallPackageFromRegistry { tarball_mem_cache, @@ -122,99 +98,145 @@ impl<'a> InstallPackageFromRegistry<'a> { logged_methods, requester, node_modules_dir, - name, - .. + alias, + resolution, + first_visit, } = self; - let store_folder_name = package_version.to_virtual_store_name(); - let package_id = format!("{0}@{1}", package_version.name, package_version.version); - - // `pnpm:progress resolved` mirrors pnpm's emit at - // : - // one event per package once the resolver has picked a - // version. In pacquet's no-lockfile path that's the - // registry-fetched `package_version`; emit before the - // tarball download so consumers see resolved → fetched/ - // found_in_store → imported in order. - Reporter::emit(&LogEvent::Progress(ProgressLog { - level: LogLevel::Debug, - message: ProgressMessage::Resolved { - package_id: package_id.clone(), - requester: requester.to_owned(), - }, - })); - - // TODO: skip when it already exists in store? - let cas_paths = DownloadTarballToStore { - http_client, - store_dir: &config.store_dir, - store_index: store_index.cloned(), - store_index_writer: store_index_writer.cloned(), - verify_store_integrity: config.verify_store_integrity, - verified_files_cache: SharedVerifiedFilesCache::clone(verified_files_cache), - package_integrity: package_version - .dist - .integrity - .as_ref() - .expect("has integrity field"), - package_unpacked_size: package_version.dist.unpacked_size, - package_url: package_version.as_tarball_url(), - package_id: &package_id, - requester, - prefetched_cas_paths: None, - retry_opts: retry_opts_from_config(config), - auth_headers: &config.auth_headers, - ignore_file_pattern: None, - offline: config.offline, - } - .run_with_mem_cache::(tarball_mem_cache) - .await - .map_err(InstallPackageFromRegistryError::DownloadTarballToStore)?; + let real_name = resolution.id.name.to_string(); + let version = resolution.id.suffix.to_string(); + let virtual_store_name = format!("{}@{}", real_name.replace('/', "+"), version); + let package_id = format!("{real_name}@{version}"); // The virtual store always uses the registry-returned name - // (`package_version.name`), so npm-alias entries share a single - // virtual store directory with their non-aliased counterparts. - // The exposed symlink under `node_modules/` uses the manifest - // key (`name`) so both forms can coexist in the same parent. + // so npm-alias entries share a single virtual store directory + // with their non-aliased counterparts. The exposed symlink + // under `node_modules/` uses the manifest key (`alias`) so + // both forms can coexist in the same parent. let save_path = config .virtual_store_dir - .join(store_folder_name) + .join(&virtual_store_name) .join("node_modules") - .join(&package_version.name); + .join(&real_name); - let symlink_path = node_modules_dir.join(name); + let symlink_path = node_modules_dir.join(alias); - tracing::info!(target: "pacquet::import", ?save_path, ?symlink_path, "Import package"); + if first_visit { + let (tarball_url, integrity) = extract_tarball(&resolution.resolution)?; + let unpacked_size = manifest_unpacked_size(resolution.manifest.as_ref()); - import_indexed_dir::( - logged_methods, - config.package_import_method, - &save_path, - &cas_paths, - ImportIndexedDirOpts::default(), - ) - .map_err(InstallPackageFromRegistryError::ImportIndexedDir)?; + // `pnpm:progress resolved` mirrors pnpm's emit at + // : + // one event per package once the resolver has picked a + // version. Emit before the tarball download so consumers + // see resolved → fetched/found_in_store → imported in + // order. + Reporter::emit(&LogEvent::Progress(ProgressLog { + level: LogLevel::Debug, + message: ProgressMessage::Resolved { + package_id: package_id.clone(), + requester: requester.to_owned(), + }, + })); + // TODO: skip when it already exists in store? + let cas_paths = DownloadTarballToStore { + http_client, + store_dir: &config.store_dir, + store_index: store_index.cloned(), + store_index_writer: store_index_writer.cloned(), + verify_store_integrity: config.verify_store_integrity, + verified_files_cache: SharedVerifiedFilesCache::clone(verified_files_cache), + package_integrity: &integrity, + package_unpacked_size: unpacked_size, + package_url: tarball_url, + package_id: &package_id, + requester, + prefetched_cas_paths: None, + retry_opts: retry_opts_from_config(config), + auth_headers: &config.auth_headers, + ignore_file_pattern: None, + offline: config.offline, + } + .run_with_mem_cache::(tarball_mem_cache) + .await + .map_err(InstallPackageFromRegistryError::DownloadTarballToStore)?; + + tracing::info!(target: "pacquet::import", ?save_path, ?symlink_path, "Import package"); + + import_indexed_dir::( + logged_methods, + config.package_import_method, + &save_path, + &cas_paths, + ImportIndexedDirOpts::default(), + ) + .map_err(InstallPackageFromRegistryError::ImportIndexedDir)?; + + // `pnpm:progress imported` — see the matching emit in + // `create_virtual_dir_by_snapshot::run` for the rationale + // on the optimistic `method` value. `to` is the per- + // package virtual-store directory the symlink under + // `node_modules/{alias}` resolves to. + Reporter::emit(&LogEvent::Progress(ProgressLog { + level: LogLevel::Debug, + message: ProgressMessage::Imported { + method: crate::optimistic_wire_method(config.package_import_method), + requester: requester.to_owned(), + to: save_path.to_string_lossy().into_owned(), + }, + })); + } + + // The per-parent symlink is the only step that runs on every + // visit. Mirrors pnpm: one `pnpm:progress` sequence per + // package, plus one symlink per direct edge. symlink_package(&save_path, &symlink_path) .map_err(InstallPackageFromRegistryError::SymlinkPackage)?; - // `pnpm:progress imported` — see the matching emit in - // `create_virtual_dir_by_snapshot::run` for the rationale on - // the optimistic `method` value. `to` is the per-package - // virtual-store directory the symlink under - // `node_modules/{name}` resolves to. - Reporter::emit(&LogEvent::Progress(ProgressLog { - level: LogLevel::Debug, - message: ProgressMessage::Imported { - method: crate::optimistic_wire_method(config.package_import_method), - requester: requester.to_owned(), - to: save_path.to_string_lossy().into_owned(), - }, - })); - Ok(()) } } +/// Pull the tarball URL + integrity hash out of the resolver-produced +/// resolution. Refuses any shape the npm install path can't fetch. +fn extract_tarball( + resolution: &LockfileResolution, +) -> Result<(&str, Integrity), InstallPackageFromRegistryError> { + match resolution { + LockfileResolution::Tarball(t) => { + let integrity = t.integrity.clone().ok_or_else(|| { + InstallPackageFromRegistryError::UnsupportedResolution { + detail: "tarball resolution missing integrity hash".to_string(), + } + })?; + Ok((t.tarball.as_str(), integrity)) + } + LockfileResolution::Registry(_) + | LockfileResolution::Directory(_) + | LockfileResolution::Git(_) + | LockfileResolution::Binary(_) + | LockfileResolution::Variations(_) => { + Err(InstallPackageFromRegistryError::UnsupportedResolution { + detail: format!("{resolution:?}"), + }) + } + } +} + +/// Read `dist.unpackedSize` off the resolver-fetched manifest. Returns +/// `None` when missing or non-numeric — the tarball extractor treats it +/// as a hint, not a hard requirement. +fn manifest_unpacked_size(manifest: Option<&Value>) -> Option { + // `usize::try_from` so a `u64` value larger than the host's + // `usize` (32-bit targets) degrades to "no hint" rather than + // truncating silently and producing an undersized pre-allocation. + manifest? + .get("dist")? + .get("unpackedSize")? + .as_u64() + .and_then(|value| usize::try_from(value).ok()) +} + #[cfg(test)] mod tests; diff --git a/pacquet/crates/package-manager/src/install_package_from_registry/tests.rs b/pacquet/crates/package-manager/src/install_package_from_registry/tests.rs index 3aa9095970..dabd752c13 100644 --- a/pacquet/crates/package-manager/src/install_package_from_registry/tests.rs +++ b/pacquet/crates/package-manager/src/install_package_from_registry/tests.rs @@ -1,15 +1,17 @@ use super::InstallPackageFromRegistry; -use node_semver::Version; use pacquet_config::Config; use pacquet_network::ThrottledClient; use pacquet_registry_mock::AutoMockInstance; use pacquet_reporter::{LogEvent, ProgressMessage, Reporter, SilentReporter}; +use pacquet_resolving_npm_resolver::{InMemoryPackageMetaCache, NpmResolver}; +use pacquet_resolving_resolver_base::{ResolveOptions, Resolver, WantedDependency}; use pacquet_store_dir::{SharedVerifiedFilesCache, StoreDir}; use pipe_trait::Pipe; use pretty_assertions::assert_eq; use std::{ + collections::HashMap, path::Path, - sync::{Mutex, atomic::AtomicU8}, + sync::{Arc, Mutex, atomic::AtomicU8}, }; use tempfile::tempdir; @@ -74,19 +76,64 @@ fn create_config(store_dir: &Path, modules_dir: &Path, virtual_store_dir: &Path) } } +async fn resolve_via_mock( + registry: &str, + cache_dir: &Path, + http_client: Arc, + alias: &str, + range: &str, +) -> pacquet_resolving_resolver_base::ResolveResult { + let mut registries = HashMap::new(); + registries.insert("default".to_string(), registry.to_string()); + let resolver = NpmResolver { + registries, + named_registries: HashMap::new(), + http_client, + auth_headers: Default::default(), + meta_cache: Arc::new(InMemoryPackageMetaCache::default()), + cache_dir: Some(cache_dir.to_path_buf()), + offline: false, + prefer_offline: false, + ignore_missing_time_field: true, + }; + let wanted = WantedDependency { + alias: Some(alias.to_string()), + bare_specifier: Some(range.to_string()), + ..WantedDependency::default() + }; + resolver + .resolve(&wanted, &ResolveOptions::default()) + .await + .expect("resolve succeeds against the mock registry") + .expect("resolver claims the dep") +} + #[tokio::test] -pub async fn should_find_package_version_from_registry() { +pub async fn should_install_package_from_pre_resolved_result() { + let mock_instance = AutoMockInstance::load_or_init(); let store_dir = tempdir().unwrap(); let modules_dir = tempdir().unwrap(); let virtual_store_dir = tempdir().unwrap(); - let config: &'static Config = - create_config(store_dir.path(), modules_dir.path(), virtual_store_dir.path()) - .pipe(Box::new) - .pipe(Box::leak); - let http_client = ThrottledClient::new_for_installs(); + let cache_dir = tempdir().unwrap(); + + let mut config = create_config(store_dir.path(), modules_dir.path(), virtual_store_dir.path()); + config.registry = mock_instance.url(); + let config: &'static Config = config.pipe(Box::new).pipe(Box::leak); + + let http_client = Arc::new(ThrottledClient::new_for_installs()); let verified_files_cache = SharedVerifiedFilesCache::default(); let logged_methods = AtomicU8::new(0); - let package = InstallPackageFromRegistry { + + let resolution = resolve_via_mock( + &config.registry, + cache_dir.path(), + Arc::clone(&http_client), + "@pnpm.e2e/hello-world-js-bin", + "1.0.0", + ) + .await; + + InstallPackageFromRegistry { tarball_mem_cache: &Default::default(), config, http_client: &http_client, @@ -95,30 +142,19 @@ pub async fn should_find_package_version_from_registry() { verified_files_cache: &verified_files_cache, logged_methods: &logged_methods, requester: "", - name: "fast-querystring", - version_range: "1.0.0", + alias: "@pnpm.e2e/hello-world-js-bin", + resolution: &resolution, node_modules_dir: modules_dir.path(), + first_visit: true, } .run::() .await .unwrap(); - assert_eq!(package.name, "fast-querystring"); - assert_eq!( - package.version, - Version { major: 1, minor: 0, patch: 0, build: vec![], pre_release: vec![] }, - ); - - let virtual_store_path = virtual_store_dir - .path() - .join(package.to_virtual_store_name()) - .join("node_modules") - .join(&package.name); - eprintln!( - "virtual_store_path={virtual_store_path:?} exists={} is_dir={}", - virtual_store_path.exists(), - virtual_store_path.is_dir(), - ); + let real_name = resolution.id.name.to_string(); + let virtual_store_name = format!("{}@{}", real_name.replace('/', "+"), resolution.id.suffix); + let virtual_store_path = + virtual_store_dir.path().join(virtual_store_name).join("node_modules").join(&real_name); assert!(virtual_store_path.is_dir()); // Make sure the symlink resolves to the correct path. pacquet @@ -126,28 +162,130 @@ pub async fn should_find_package_version_from_registry() { // (matching upstream `symlink-dir`), so canonicalize via the // link itself rather than comparing `read_link` output against // the absolute store path. - let symlink_path = modules_dir.path().join(&package.name); + let symlink_path = modules_dir.path().join("@pnpm.e2e/hello-world-js-bin"); assert_eq!( dunce::canonicalize(&symlink_path).expect("canonicalize symlink"), dunce::canonicalize(&virtual_store_path).expect("canonicalize virtual store path"), ); + + drop((store_dir, modules_dir, virtual_store_dir, cache_dir, mock_instance)); } -/// `InstallPackageFromRegistry::run` (the no-lockfile path) emits -/// the `pnpm:progress` per-package sequence: `resolved` before the -/// tarball download, then `fetched` (or `found_in_store` on a cache -/// hit) from inside `DownloadTarballToStore`, then `imported` after -/// `create_cas_files` returns Ok. Pin the order with a recording -/// reporter — a regression in either the sequence or the -/// `package_id`/`requester` payload would currently slip through -/// since the tarball-side and frozen-lockfile-side tests don't -/// exercise this code path. -/// -/// Uses `AutoMockInstance` (the workspace's local mock registry) so -/// the test isn't network-dependent — same pattern as -/// `install::tests::should_install_dependencies`. +/// Second-edge install for the same `(name, version)` must NOT emit +/// `pnpm:progress resolved` or `pnpm:progress imported` — those are +/// per-package signals upstream, not per-edge. The second visitor +/// only refreshes the per-parent symlink. Pin the contract here so a +/// future refactor that moves the gate can't quietly reintroduce +/// per-edge spam. #[tokio::test] -async fn no_lockfile_install_emits_progress_sequence() { +async fn second_visit_skips_progress_emits_but_still_links() { + static EVENTS: Mutex> = Mutex::new(Vec::new()); + EVENTS.lock().unwrap().clear(); + + struct RecordingReporter; + impl Reporter for RecordingReporter { + fn emit(event: &LogEvent) { + EVENTS.lock().unwrap().push(event.clone()); + } + } + + let mock_instance = AutoMockInstance::load_or_init(); + let store_dir = tempdir().unwrap(); + let modules_dir = tempdir().unwrap(); + let second_parent_dir = tempdir().unwrap(); + let virtual_store_dir = tempdir().unwrap(); + let cache_dir = tempdir().unwrap(); + + let mut config = create_config(store_dir.path(), modules_dir.path(), virtual_store_dir.path()); + config.registry = mock_instance.url(); + let config: &'static Config = config.pipe(Box::new).pipe(Box::leak); + + let http_client = Arc::new(ThrottledClient::new_for_installs()); + let verified_files_cache = SharedVerifiedFilesCache::default(); + let logged_methods = AtomicU8::new(0); + + let resolution = resolve_via_mock( + &config.registry, + cache_dir.path(), + Arc::clone(&http_client), + "@pnpm.e2e/hello-world-js-bin", + "1.0.0", + ) + .await; + + // First edge: full path. Run, then clear events for the assertion + // on the second edge. + InstallPackageFromRegistry { + tarball_mem_cache: &Default::default(), + config, + http_client: &http_client, + store_index: None, + store_index_writer: None, + verified_files_cache: &verified_files_cache, + logged_methods: &logged_methods, + requester: "/proj", + alias: "first-alias", + resolution: &resolution, + node_modules_dir: modules_dir.path(), + first_visit: true, + } + .run::() + .await + .expect("first visit installs cleanly"); + EVENTS.lock().unwrap().clear(); + + // Second edge: same `(name, version)`, different parent dir. + InstallPackageFromRegistry { + tarball_mem_cache: &Default::default(), + config, + http_client: &http_client, + store_index: None, + store_index_writer: None, + verified_files_cache: &verified_files_cache, + logged_methods: &logged_methods, + requester: "/proj", + alias: "second-alias", + resolution: &resolution, + node_modules_dir: second_parent_dir.path(), + first_visit: false, + } + .run::() + .await + .expect("second visit symlinks cleanly"); + + let kinds: Vec<&'static str> = EVENTS + .lock() + .unwrap() + .iter() + .filter_map(|event| match event { + LogEvent::Progress(log) => Some(match &log.message { + ProgressMessage::Resolved { .. } => "resolved", + ProgressMessage::Fetched { .. } => "fetched", + ProgressMessage::FoundInStore { .. } => "found_in_store", + ProgressMessage::Imported { .. } => "imported", + }), + _ => None, + }) + .collect(); + assert!(kinds.is_empty(), "second visit must not emit progress events, got {kinds:?}"); + + // The second-parent symlink must exist after the call. + let symlink_path = second_parent_dir.path().join("second-alias"); + assert!(symlink_path.exists() || symlink_path.is_symlink(), "per-parent symlink missing"); + + drop((store_dir, modules_dir, second_parent_dir, virtual_store_dir, cache_dir, mock_instance)); +} + +/// `InstallPackageFromRegistry::run` emits the `pnpm:progress` per- +/// package sequence: `resolved` before the tarball download, then +/// `fetched` (or `found_in_store` on a cache hit) from inside +/// `DownloadTarballToStore`, then `imported` after `create_cas_files` +/// returns Ok. Pin the order with a recording reporter — a regression +/// in either the sequence or the `package_id`/`requester` payload +/// would currently slip through since the tarball-side and +/// frozen-lockfile-side tests don't exercise this code path. +#[tokio::test] +async fn install_emits_progress_sequence() { static EVENTS: Mutex> = Mutex::new(Vec::new()); EVENTS.lock().unwrap().clear(); @@ -163,16 +301,26 @@ async fn no_lockfile_install_emits_progress_sequence() { let store_dir = tempdir().unwrap(); let modules_dir = tempdir().unwrap(); let virtual_store_dir = tempdir().unwrap(); + let cache_dir = tempdir().unwrap(); let mut config = create_config(store_dir.path(), modules_dir.path(), virtual_store_dir.path()); config.registry = mock_instance.url(); let config: &'static Config = config.pipe(Box::new).pipe(Box::leak); - let http_client = ThrottledClient::new_for_installs(); + let http_client = Arc::new(ThrottledClient::new_for_installs()); let verified_files_cache = SharedVerifiedFilesCache::default(); let logged_methods = AtomicU8::new(0); - let _package = InstallPackageFromRegistry { + let resolution = resolve_via_mock( + &config.registry, + cache_dir.path(), + Arc::clone(&http_client), + "@pnpm.e2e/hello-world-js-bin", + "1.0.0", + ) + .await; + + InstallPackageFromRegistry { tarball_mem_cache: &Default::default(), config, http_client: &http_client, @@ -181,9 +329,10 @@ async fn no_lockfile_install_emits_progress_sequence() { verified_files_cache: &verified_files_cache, logged_methods: &logged_methods, requester: "/proj", - name: "@pnpm.e2e/hello-world-js-bin", - version_range: "1.0.0", + alias: "@pnpm.e2e/hello-world-js-bin", + resolution: &resolution, node_modules_dir: modules_dir.path(), + first_visit: true, } .run::() .await @@ -230,5 +379,5 @@ async fn no_lockfile_install_emits_progress_sequence() { other => panic!("first event must be Resolved; got {other:?}"), } - drop((store_dir, modules_dir, virtual_store_dir, mock_instance)); + drop((store_dir, modules_dir, virtual_store_dir, cache_dir, mock_instance)); } diff --git a/pacquet/crates/package-manager/src/install_without_lockfile.rs b/pacquet/crates/package-manager/src/install_without_lockfile.rs index e352b33147..cb83e5884a 100644 --- a/pacquet/crates/package-manager/src/install_without_lockfile.rs +++ b/pacquet/crates/package-manager/src/install_without_lockfile.rs @@ -3,7 +3,7 @@ use crate::{ LinkVirtualStoreBins, LinkVirtualStoreBinsError, store_init::init_store_dir_best_effort, }; use async_recursion::async_recursion; -use dashmap::DashSet; +use dashmap::{DashMap, mapref::entry::Entry}; use derive_more::{Display, Error}; use futures_util::future; use miette::Diagnostic; @@ -11,34 +11,58 @@ use pacquet_cmd_shim::{Host, LinkBinsError, link_bins}; use pacquet_config::Config; use pacquet_network::ThrottledClient; use pacquet_package_manifest::{DependencyGroup, PackageManifest}; -use pacquet_registry::PackageVersion; use pacquet_reporter::{LogEvent, LogLevel, Reporter, Stage, StageLog}; +use pacquet_resolving_deps_resolver::{ + DirectDep, ResolveDependencyTreeError, ResolveDependencyTreeOptions, ResolvedTree, + resolve_dependency_tree, +}; +use pacquet_resolving_npm_resolver::{InMemoryPackageMetaCache, NpmResolver}; +use pacquet_resolving_resolver_base::ResolveOptions; use pacquet_store_dir::{SharedVerifiedFilesCache, StoreIndex, StoreIndexWriter}; use pacquet_tarball::MemCache; use pipe_trait::Pipe; -use std::collections::BTreeMap; -use std::sync::atomic::AtomicU8; +use std::{ + collections::{BTreeMap, HashMap}, + path::Path, + sync::{Arc, atomic::AtomicU8}, +}; +use tokio::sync::watch; -/// In-memory cache for packages that have started resolving dependencies. +/// In-memory dedup gate for packages materialized during this install. +/// Keyed by virtual-store name (`{name-with-slashes-replaced}@{version}`). /// -/// The contents of set is the package's virtual_store_name. -/// e.g. `@pnpm.e2e/dep-1@1.0.0` → `@pnpm.e2e+dep-1@1.0.0` -pub type ResolvedPackages = DashSet; +/// The value is a [`watch::Sender`] whose state transitions from +/// `false` (slot reserved, first writer running) to `true` (the first +/// writer's materialization is complete, save_path is on disk). +/// Second visitors subscribe to the sender before issuing their +/// per-parent symlink so they don't race ahead of the first writer's +/// `import_indexed_dir` — critical on Windows where `symlink_package` +/// may fall back to a junction, which requires the target directory +/// to exist at creation time. Mirrors the implicit "wait until the +/// shared slot is on disk" sequencing pnpm gets from running one +/// resolveDependencyTree pass before the install pass. +pub type ResolvedPackages = DashMap>; /// This subroutine install packages from a `package.json` without reading or writing a lockfile. /// /// **Brief overview for each package:** -/// * Fetch a tarball of the package. -/// * Extract the tarball into the store directory. -/// * Import (by reflink, hardlink, or copy) the files from the store dir to `node_modules/.pacquet/{name}@{version}/node_modules/{name}/`. -/// * Create dependency symbolic links in `node_modules/.pacquet/{name}@{version}/node_modules/`. +/// * Resolve the dependency through the [`NpmResolver`] chain +/// ([`resolve_dependency_tree`] builds the full tree first). +/// * Fetch a tarball of each resolved package and extract it into the +/// store directory. +/// * Import (by reflink, hardlink, or copy) the files from the store +/// dir to `node_modules/.pacquet/{name}@{version}/node_modules/{name}/`. +/// * Create dependency symbolic links in +/// `node_modules/.pacquet/{name}@{version}/node_modules/`. /// * Create a symbolic link at `node_modules/{name}`. -/// * Repeat the process for the dependencies of the package. #[must_use] pub struct InstallWithoutLockfile<'a, DependencyGroupList> { pub tarball_mem_cache: &'a MemCache, pub resolved_packages: &'a ResolvedPackages, pub http_client: &'a ThrottledClient, + /// Same client behind an [`Arc`] for the [`NpmResolver`], whose + /// stored `ThrottledClient` outlives any per-call borrow. + pub http_client_arc: Arc, pub config: &'static Config, pub manifest: &'a PackageManifest, pub dependency_groups: DependencyGroupList, @@ -60,6 +84,34 @@ pub enum InstallWithoutLockfileError { #[diagnostic(transparent)] LinkVirtualStoreBins(#[error(source)] LinkVirtualStoreBinsError), + + /// The resolver chain failed for at least one dependency. Mirrors + /// upstream's per-dep resolver error surface — the inner message + /// carries the boxed error's `Display`. + #[display("Failed to resolve dependency tree: {_0}")] + #[diagnostic(code(pacquet_package_manager::resolve_dependency_tree))] + ResolveDependencyTree(#[error(not(source))] ResolveDependencyTreeError), + + /// `minimumReleaseAgeExclude` patterns rejected at compile time. + /// Mirrors upstream's `ERR_PNPM_INVALID_MINIMUM_RELEASE_AGE_EXCLUDE`. + #[display("Invalid value in minimumReleaseAgeExclude: {_0}")] + #[diagnostic(code(ERR_PNPM_INVALID_MINIMUM_RELEASE_AGE_EXCLUDE))] + MinimumReleaseAgeExclude(#[error(source)] pacquet_config::version_policy::VersionPolicyError), + + /// The first writer of a shared `(name, version)` slot dropped its + /// completion signal without sending `true`. In practice this only + /// fires when the first writer's task panicked / was cancelled + /// mid-import; a second visitor that was waiting on the slot can't + /// safely create its per-parent symlink (the virtual-store target + /// directory may not exist), so the install fails closed. + #[display( + "First writer for virtual-store slot {virtual_store_name} dropped before signalling completion" + )] + #[diagnostic(code(pacquet_package_manager::first_writer_aborted))] + FirstWriterAborted { + #[error(not(source))] + virtual_store_name: String, + }, } impl<'a, DependencyGroupList> InstallWithoutLockfile<'a, DependencyGroupList> { @@ -82,6 +134,7 @@ impl<'a, DependencyGroupList> InstallWithoutLockfile<'a, DependencyGroupList> { let InstallWithoutLockfile { tarball_mem_cache, http_client, + http_client_arc, config, manifest, dependency_groups, @@ -99,6 +152,67 @@ impl<'a, DependencyGroupList> InstallWithoutLockfile<'a, DependencyGroupList> { // policy shared with `create_virtual_store.rs`. init_store_dir_best_effort(store_dir).await; + // Resolve pass: walk the manifest's dependencies through the + // npm resolver chain and produce a flat tree keyed by + // `name@version`. The meta cache is owned for the duration of + // this call so every per-package resolve reuses a single + // packument per `(registry, name)` pair, then dropped before + // the install pass begins. + let mut registries = HashMap::new(); + registries.insert("default".to_string(), config.registry.clone()); + let npm_resolver = NpmResolver { + registries, + named_registries: HashMap::new(), + http_client: Arc::clone(&http_client_arc), + auth_headers: Arc::clone(&config.auth_headers), + meta_cache: Arc::new(InMemoryPackageMetaCache::default()), + cache_dir: Some(config.cache_dir.clone()), + offline: config.offline, + prefer_offline: config.prefer_offline, + ignore_missing_time_field: config.minimum_release_age_ignore_missing_time, + }; + + // Compile `minimumReleaseAge` (and its exclude pattern set) + // for the resolve pass. Mirrors the verifier wiring in + // `build_resolution_verifiers` so the resolver-time pick and + // the lockfile-verification check enforce the same policy. + // + // Every step uses checked arithmetic so an absurd configured + // value (e.g. `u64::MAX`) can't wrap on the `u64 → i64` cast, + // overflow inside `chrono::Duration`, or underflow the + // wall-clock subtraction. On overflow we leave the policy + // inactive for this install — better than silently producing + // a cutoff in the wrong direction. + let published_by = config.minimum_release_age.and_then(|minutes| { + let duration = chrono::Duration::try_minutes(i64::try_from(minutes).ok()?)?; + chrono::Utc::now().checked_sub_signed(duration) + }); + let published_by_exclude = config + .minimum_release_age_exclude + .as_deref() + .filter(|patterns| !patterns.is_empty()) + .map(pacquet_config::version_policy::create_package_version_policy) + .transpose() + .map_err(InstallWithoutLockfileError::MinimumReleaseAgeExclude)?; + + let tree_opts = ResolveDependencyTreeOptions { + auto_install_peers: config.auto_install_peers, + base_opts: ResolveOptions { + default_tag: Some("latest".to_string()), + published_by, + published_by_exclude, + ..ResolveOptions::default() + }, + }; + + let tree = resolve_dependency_tree(&npm_resolver, manifest, dependency_groups, tree_opts) + .await + .map_err(InstallWithoutLockfileError::ResolveDependencyTree)?; + + // Drop the resolver (and its meta cache) before the install + // pass: the tree captures every `ResolveResult` we need. + drop(npm_resolver); + // Open the read-only SQLite index once per install, shared across // every `DownloadTarballToStore`. See the matching comment in // `create_virtual_store.rs` for the full rationale, including the @@ -133,53 +247,22 @@ impl<'a, DependencyGroupList> InstallWithoutLockfile<'a, DependencyGroupList> { // for any later package referencing the same blob. let verified_files_cache = SharedVerifiedFilesCache::default(); - manifest - .dependencies(dependency_groups) - .map(|(name, version_range)| { - // Same pattern as `create_virtual_store.rs`: clone the - // shared cache handle so each per-dependency future owns - // a handle it can move into the `async move` block and - // then reference from within the future. - let verified_files_cache = SharedVerifiedFilesCache::clone(&verified_files_cache); - async move { - let dependency = InstallPackageFromRegistry { - tarball_mem_cache, - http_client, - config, - store_index: store_index_ref, - store_index_writer: store_index_writer_ref, - verified_files_cache: &verified_files_cache, - logged_methods, - requester, - node_modules_dir: &config.modules_dir, - name, - version_range, - } - .run::() - .await - .map_err(InstallWithoutLockfileError::InstallPackageFromRegistry)?; + let install_ctx = InstallCtx { + tarball_mem_cache, + http_client, + config, + tree: &tree, + store_index: store_index_ref, + store_index_writer: store_index_writer_ref, + verified_files_cache: &verified_files_cache, + logged_methods, + resolved_packages, + requester, + }; - InstallWithoutLockfile { - tarball_mem_cache, - http_client, - config, - manifest, - dependency_groups: (), - resolved_packages, - logged_methods, - requester, - } - .install_dependencies_from_registry::( - &dependency, - store_index_ref, - store_index_writer_ref, - &verified_files_cache, - ) - .await?; - - Ok::<_, InstallWithoutLockfileError>(()) - } - }) + tree.direct + .iter() + .map(|dep| install_subtree::(&install_ctx, dep, &config.modules_dir)) .pipe(future::try_join_all) .await?; @@ -255,77 +338,127 @@ impl<'a, DependencyGroupList> InstallWithoutLockfile<'a, DependencyGroupList> { } } -impl<'a> InstallWithoutLockfile<'a, ()> { - /// Install dependencies of a dependency. - #[async_recursion] - async fn install_dependencies_from_registry( - &self, - package: &PackageVersion, - store_index: Option<&'async_recursion pacquet_store_dir::SharedReadonlyStoreIndex>, - store_index_writer: Option< - &'async_recursion std::sync::Arc, - >, - verified_files_cache: &'async_recursion SharedVerifiedFilesCache, - ) -> Result<(), InstallWithoutLockfileError> - where - Reporter: self::Reporter, - { - let InstallWithoutLockfile { - tarball_mem_cache, - http_client, - config, - resolved_packages, - .. - } = self; - - // This package has already resolved, there is no need to reinstall again. - if !resolved_packages.insert(package.to_virtual_store_name()) { - tracing::info!(target: "pacquet::install", package = ?package.to_virtual_store_name(), "Skip subset"); - return Ok(()); - } - - let node_modules_path = self - .config - .virtual_store_dir - .join(package.to_virtual_store_name()) - .join("node_modules"); - - tracing::info!(target: "pacquet::install", node_modules = ?node_modules_path, "Start subset"); - - let node_modules_path_ref = &node_modules_path; - package - .dependencies(self.config.auto_install_peers) - .map(|(name, version_range)| async move { - let dependency = InstallPackageFromRegistry { - tarball_mem_cache, - http_client, - config, - store_index, - store_index_writer, - verified_files_cache, - logged_methods: self.logged_methods, - requester: self.requester, - node_modules_dir: node_modules_path_ref, - name, - version_range, - } - .run::() - .await - .map_err(InstallWithoutLockfileError::InstallPackageFromRegistry)?; - self.install_dependencies_from_registry::( - &dependency, - store_index, - store_index_writer, - verified_files_cache, - ) - .await?; - Ok::<_, InstallWithoutLockfileError>(()) - }) - .pipe(future::try_join_all) - .await?; - - tracing::info!(target: "pacquet::install", node_modules = ?node_modules_path, "Complete subset"); - - Ok(()) - } +/// Per-install state threaded into [`install_subtree`]. Holds every +/// shared handle the per-package installer needs plus a borrowed view +/// of the resolved tree the resolve pass produced. +struct InstallCtx<'a> { + tarball_mem_cache: &'a MemCache, + http_client: &'a ThrottledClient, + config: &'static Config, + tree: &'a ResolvedTree, + store_index: Option<&'a pacquet_store_dir::SharedReadonlyStoreIndex>, + store_index_writer: Option<&'a Arc>, + verified_files_cache: &'a SharedVerifiedFilesCache, + logged_methods: &'a AtomicU8, + resolved_packages: &'a ResolvedPackages, + requester: &'a str, +} + +/// Install the package referenced by `dep` plus its transitive +/// children. Recurses into each child's `node_modules/.pacquet// +/// node_modules/` so transitive symlinks land in their parent's slot. +#[async_recursion] +async fn install_subtree<'ctx, Reporter>( + ctx: &InstallCtx<'ctx>, + dep: &DirectDep, + node_modules_dir: &Path, +) -> Result<(), InstallWithoutLockfileError> +where + Reporter: self::Reporter, +{ + let package = ctx + .tree + .packages + .get(&dep.id) + .expect("resolve_dependency_tree must populate every referenced id"); + + let virtual_store_name = format!( + "{}@{}", + package.result.id.name.to_string().replace('/', "+"), + package.result.id.suffix, + ); + + // Claim the `(name, version)` slot. `first_visit` is true iff this + // task created the watch sender; later visitors get a receiver and + // await the first writer's completion before continuing — without + // that gate, a second visitor's `symlink_package` could land + // before the first writer's `import_indexed_dir` has created the + // target directory, which `force_symlink_dir`'s Windows junction + // fallback rejects (junctions require an existing target). + let (first_visit, completion_rx) = match ctx.resolved_packages.entry(virtual_store_name.clone()) + { + Entry::Vacant(slot) => { + let (tx, _initial_rx) = watch::channel(false); + slot.insert(tx); + (true, None) + } + Entry::Occupied(slot) => (false, Some(slot.get().subscribe())), + }; + + if let Some(mut rx) = completion_rx { + loop { + if *rx.borrow_and_update() { + break; + } + if rx.changed().await.is_err() { + return Err(InstallWithoutLockfileError::FirstWriterAborted { virtual_store_name }); + } + } + } + + InstallPackageFromRegistry { + tarball_mem_cache: ctx.tarball_mem_cache, + http_client: ctx.http_client, + config: ctx.config, + store_index: ctx.store_index, + store_index_writer: ctx.store_index_writer, + verified_files_cache: ctx.verified_files_cache, + logged_methods: ctx.logged_methods, + requester: ctx.requester, + node_modules_dir, + alias: &dep.alias, + resolution: &package.result, + first_visit, + } + .run::() + .await + .map_err(InstallWithoutLockfileError::InstallPackageFromRegistry)?; + + if first_visit { + // `send_replace` (not `send`) is critical here: `Sender::send` + // returns `Err` when the channel has zero receivers, leaving + // the sender's stored value unchanged. The initial receiver + // from `watch::channel` is dropped at the Vacant arm above to + // avoid keeping a live receiver in the map, so the channel + // really *does* have zero receivers when the first writer + // races ahead of every subscriber (common for cyclic and + // diamond graphs where the second visitor only enters the map + // after the first writer has already finished `IPFR::run`). + // Under `send`, that race left the stored value at `false` and + // any later subscriber's `borrow_and_update()` would see + // `false` and `changed().await` would block forever — the + // hang the cycle tests hit. `send_replace` always writes the + // value and returns the old one regardless of receiver count. + if let Some(slot) = ctx.resolved_packages.get(&virtual_store_name) { + slot.send_replace(true); + } + } else { + // Second visitor: the per-parent symlink is the only step + // that needed to run; the first writer is already walking + // this package's children. + tracing::info!(target: "pacquet::install", package = %virtual_store_name, "Skip subset"); + return Ok(()); + } + + let child_node_modules = + ctx.config.virtual_store_dir.join(&virtual_store_name).join("node_modules"); + + package + .children + .iter() + .map(|child| install_subtree::(ctx, child, &child_node_modules)) + .pipe(future::try_join_all) + .await?; + + Ok(()) } diff --git a/pacquet/crates/resolving-deps-resolver/Cargo.toml b/pacquet/crates/resolving-deps-resolver/Cargo.toml new file mode 100644 index 0000000000..4cc5dff4ea --- /dev/null +++ b/pacquet/crates/resolving-deps-resolver/Cargo.toml @@ -0,0 +1,34 @@ +[package] +name = "pacquet-resolving-deps-resolver" +version = "0.0.1" +publish = false +authors.workspace = true +description.workspace = true +edition.workspace = true +homepage.workspace = true +keywords.workspace = true +license.workspace = true +repository.workspace = true + +[dependencies] +pacquet-package-manifest = { workspace = true } +pacquet-resolving-resolver-base = { workspace = true } + +async-recursion = { workspace = true } +derive_more = { workspace = true } +futures-util = { workspace = true } +miette = { workspace = true } +pipe-trait = { workspace = true } +serde_json = { workspace = true } +tokio = { workspace = true, features = ["sync"] } + +[dev-dependencies] +pacquet-lockfile = { workspace = true } + +node-semver = { workspace = true } +pretty_assertions = { workspace = true } +tempfile = { workspace = true } +tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } + +[lints] +workspace = true diff --git a/pacquet/crates/resolving-deps-resolver/src/lib.rs b/pacquet/crates/resolving-deps-resolver/src/lib.rs new file mode 100644 index 0000000000..a424d9023a --- /dev/null +++ b/pacquet/crates/resolving-deps-resolver/src/lib.rs @@ -0,0 +1,35 @@ +//! Minimum-slice port of pnpm's +//! [`resolveDependencyTree`](https://github.com/pnpm/pnpm/blob/f657b5cb44/installing/deps-resolver/src/resolveDependencyTree.ts). +//! +//! Walks a project manifest's direct dependencies through a +//! [`Resolver`](pacquet_resolving_resolver_base::Resolver) chain, +//! then recurses on every resolved package's own manifest +//! dependencies, producing a flat package map and a tree of parent- +//! child edges. Pacquet's install layer uses this as the resolve +//! pass before the tarball-fetch + install pass. +//! +//! This is intentionally a thin slice of upstream: +//! +//! - **Single importer.** Pacquet's install doesn't expose workspaces +//! to the resolver yet; the entry point takes one manifest at a +//! time. The flat-map shape ports cleanly when multi-importer +//! lands. +//! - **No peers resolution.** Upstream's +//! [`resolveRootDependencies`](https://github.com/pnpm/pnpm/blob/f657b5cb44/installing/deps-resolver/src/resolveDependencies.ts#L327) +//! walks peer dependencies as a postponed phase; this port relies +//! on `auto_install_peers` to fold peer-deps into the regular +//! dependency walk. +//! - **No catalog / hook / lockfile-pinned-version bias.** The +//! resolver is fed each child's manifest range verbatim. Lockfile- +//! driven `preferred_versions` is a follow-up. + +mod resolve_dependency_tree; +mod resolved_tree; + +pub use resolve_dependency_tree::{ + ResolveDependencyTreeError, ResolveDependencyTreeOptions, resolve_dependency_tree, +}; +pub use resolved_tree::{DirectDep, ResolvedPackage, ResolvedTree}; + +#[cfg(test)] +mod tests; diff --git a/pacquet/crates/resolving-deps-resolver/src/resolve_dependency_tree.rs b/pacquet/crates/resolving-deps-resolver/src/resolve_dependency_tree.rs new file mode 100644 index 0000000000..8b6c0d6eb0 --- /dev/null +++ b/pacquet/crates/resolving-deps-resolver/src/resolve_dependency_tree.rs @@ -0,0 +1,219 @@ +use async_recursion::async_recursion; +use derive_more::{Display, Error}; +use futures_util::future; +use miette::Diagnostic; +use pacquet_package_manifest::{DependencyGroup, PackageManifest}; +use pacquet_resolving_resolver_base::{ResolveError, ResolveOptions, Resolver, WantedDependency}; +use pipe_trait::Pipe; +use serde_json::Value; +use tokio::sync::Mutex; + +use crate::resolved_tree::{DirectDep, ResolvedPackage, ResolvedTree}; + +/// Options threaded into [`resolve_dependency_tree`]. +/// +/// Mirrors upstream's per-importer options; pacquet's slice is single- +/// importer so the bag is smaller. `base_opts` is the [`ResolveOptions`] +/// every per-package `resolve()` call sees; the tree walker doesn't +/// mutate it. `auto_install_peers` controls whether a parent's +/// `peerDependencies` are folded into its child set during the walk. +#[derive(Debug)] +pub struct ResolveDependencyTreeOptions { + pub auto_install_peers: bool, + pub base_opts: ResolveOptions, +} + +/// Error envelope returned by the tree walker. +#[derive(Debug, Display, Error, Diagnostic)] +pub enum ResolveDependencyTreeError { + /// One of the resolver chain calls failed (network, parse, etc.). + /// The inner error is the boxed type the resolver returned. + #[display("Failed to resolve dependency: {_0}")] + Resolve(#[error(not(source))] String), + + /// No resolver in the chain claimed the spec. Mirrors pnpm's + /// [`SPEC_NOT_SUPPORTED_BY_ANY_RESOLVER`](https://github.com/pnpm/pnpm/blob/3687b0e180/resolving/default-resolver/src/index.ts#L148-L156) + /// — the chain returning `None` is a contract violation outside + /// `DefaultResolver`'s `??` fall-through, so the tree walker + /// surfaces it instead of silently dropping the edge (which + /// would leave installs missing transitive deps). + #[display("\"{specifier}\" isn't supported by any available resolver.")] + #[diagnostic(code(SPEC_NOT_SUPPORTED_BY_ANY_RESOLVER))] + SpecNotSupported { + #[error(not(source))] + specifier: String, + }, +} + +/// Walk `manifest` plus the entries in `dependency_groups`, dispatch +/// each direct dep through `resolver`, recurse on each picked +/// package's own `dependencies` / `peerDependencies`, and return a +/// flat tree keyed by `name@version`. +/// +/// Mirrors upstream's +/// [`resolveDependencyTree`](https://github.com/pnpm/pnpm/blob/f657b5cb44/installing/deps-resolver/src/resolveDependencyTree.ts#L172-L357) +/// for the npm-shaped slice pacquet currently exposes. +/// +/// Resolves siblings in parallel via `try_join_all` at every level. +/// The per-package dedupe gate is a shared `HashMap` behind a +/// [`tokio::sync::Mutex`]: a sibling that's already resolving an id +/// `X` makes a later visitor see the placeholder, attach the +/// outstanding `DirectDep { id: X }`, and skip the recursion the +/// in-flight task is already running. +pub async fn resolve_dependency_tree( + resolver: &Chain, + manifest: &PackageManifest, + dependency_groups: DependencyGroupList, + opts: ResolveDependencyTreeOptions, +) -> Result +where + DependencyGroupList: IntoIterator, + Chain: Resolver + ?Sized, +{ + let ctx = Ctx { + auto_install_peers: opts.auto_install_peers, + base_opts: opts.base_opts, + packages: Mutex::new(std::collections::HashMap::new()), + policy_violations: Mutex::new(Vec::new()), + }; + + let direct_specs: Vec<(String, String)> = manifest + .dependencies(dependency_groups) + .map(|(name, range)| (name.to_string(), range.to_string())) + .collect(); + + // Capture `&ctx` and `resolver` by reference into each async + // block. The borrow lives for the duration of the `try_join_all` + // await below, so we don't need an `Arc` and the post-await + // `into_inner()` doesn't have to dance around a refcount. + let direct = direct_specs + .into_iter() + .map(|(name, range)| async { + let wanted = WantedDependency { + alias: Some(name), + bare_specifier: Some(range), + ..WantedDependency::default() + }; + resolve_node(&ctx, resolver, wanted).await + }) + .pipe(future::try_join_all) + .await?; + + Ok(ResolvedTree { + direct, + packages: ctx.packages.into_inner(), + policy_violations: ctx.policy_violations.into_inner(), + }) +} + +struct Ctx { + auto_install_peers: bool, + base_opts: ResolveOptions, + packages: Mutex>, + policy_violations: Mutex>, +} + +#[async_recursion] +async fn resolve_node( + ctx: &Ctx, + resolver: &Chain, + wanted: WantedDependency, +) -> Result +where + Chain: Resolver + ?Sized, +{ + let result = resolver + .resolve(&wanted, &ctx.base_opts) + .await + .map_err(|err: ResolveError| ResolveDependencyTreeError::Resolve(err.to_string()))?; + // The resolver returning `None` means the chain declined the + // spec. Outside `DefaultResolver`'s internal `??` fall-through, + // that is a contract violation — surface as + // `SPEC_NOT_SUPPORTED_BY_ANY_RESOLVER` instead of dropping the + // edge, which would leave installs missing dependencies. + let Some(result) = result else { + return Err(ResolveDependencyTreeError::SpecNotSupported { + specifier: render_specifier(&wanted), + }); + }; + + if let Some(violation) = result.policy_violation.clone() { + ctx.policy_violations.lock().await.push(violation); + } + + let id = result.id.to_string(); + let alias = + result.alias.clone().or(wanted.alias.clone()).unwrap_or_else(|| result.id.name.to_string()); + + // Insert a placeholder under the global lock so concurrent + // resolves for the same id collapse to one walker. The first to + // get past the gate populates the children; later visitors return + // a `DirectDep` referencing the (eventually fully populated) id. + { + let mut packages = ctx.packages.lock().await; + if packages.contains_key(&id) { + return Ok(DirectDep { alias, id }); + } + packages.insert( + id.clone(), + ResolvedPackage { id: id.clone(), result: result.clone(), children: Vec::new() }, + ); + } + + let child_specs = extract_children(&result, ctx.auto_install_peers); + let children: Vec = child_specs + .into_iter() + .map(|(child_name, child_range)| { + let child_wanted = WantedDependency { + alias: Some(child_name), + bare_specifier: Some(child_range), + ..WantedDependency::default() + }; + resolve_node(ctx, resolver, child_wanted) + }) + .pipe(future::try_join_all) + .await?; + + ctx.packages.lock().await.get_mut(&id).expect("placeholder inserted above").children = children; + + Ok(DirectDep { alias, id }) +} + +/// Render `{alias}@{bare}` (either half dropped when absent) for the +/// error message. Mirrors upstream's `render_specifier` shape in +/// `default-resolver`. +fn render_specifier(wanted: &WantedDependency) -> String { + let alias = wanted.alias.as_deref().unwrap_or(""); + let bare = wanted.bare_specifier.as_deref().unwrap_or(""); + match (alias.is_empty(), bare.is_empty()) { + (true, true) => String::new(), + (true, false) => bare.to_string(), + (false, true) => alias.to_string(), + (false, false) => format!("{alias}@{bare}"), + } +} + +/// Extract `(name, version_range)` pairs from a resolved package's +/// manifest fragment, filtered by `auto_install_peers` to optionally +/// fold `peerDependencies` into the child set. +fn extract_children( + result: &pacquet_resolving_resolver_base::ResolveResult, + with_peers: bool, +) -> Vec<(String, String)> { + let Some(manifest) = result.manifest.as_ref() else { return Vec::new() }; + let mut out = Vec::new(); + collect_deps(manifest, "dependencies", &mut out); + if with_peers { + collect_deps(manifest, "peerDependencies", &mut out); + } + out +} + +fn collect_deps(manifest: &Value, key: &str, out: &mut Vec<(String, String)>) { + let Some(map) = manifest.get(key).and_then(Value::as_object) else { return }; + for (name, range) in map { + if let Some(range_str) = range.as_str() { + out.push((name.clone(), range_str.to_string())); + } + } +} diff --git a/pacquet/crates/resolving-deps-resolver/src/resolved_tree.rs b/pacquet/crates/resolving-deps-resolver/src/resolved_tree.rs new file mode 100644 index 0000000000..90c4069a2d --- /dev/null +++ b/pacquet/crates/resolving-deps-resolver/src/resolved_tree.rs @@ -0,0 +1,44 @@ +use std::collections::HashMap; + +use pacquet_resolving_resolver_base::{ResolutionPolicyViolation, ResolveResult}; + +/// Output of [`super::resolve_dependency_tree()`]. A flat package map +/// keyed by `name@version` plus the importer's direct entries, so the +/// install pass can traverse the graph without re-resolving and skip +/// duplicates by ID. +/// +/// Mirrors upstream's +/// [`ResolveDependencyTreeResult`](https://github.com/pnpm/pnpm/blob/f657b5cb44/installing/deps-resolver/src/resolveDependencyTree.ts#L151-L170) +/// shape — `direct` carries the project's manifest-level entries, the +/// flat map carries every transitively-resolved package. +#[derive(Debug, Default, Clone)] +pub struct ResolvedTree { + pub direct: Vec, + pub packages: HashMap, + pub policy_violations: Vec, +} + +/// One edge in the resolved tree: the local install name (`alias`) and +/// the resolved package's ID (`name@version`). Same shape for top- +/// level (project manifest) entries and for transitive (parent +/// package) entries. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DirectDep { + /// Local install name in `node_modules`. For an npm-alias entry + /// (`"foo": "npm:bar@^1"`) this is `"foo"`; the resolved + /// package's real name is recoverable from + /// [`ResolvedPackage::result`]. + pub alias: String, + /// `name@version` key into [`ResolvedTree::packages`]. + pub id: String, +} + +/// A single resolved package and its outgoing edges. Mirrors upstream's +/// [`ResolvedPackage`](https://github.com/pnpm/pnpm/blob/f657b5cb44/installing/deps-resolver/src/resolveDependencies.ts#L168-L189) +/// envelope as far as the npm-shaped install path cares. +#[derive(Debug, Clone)] +pub struct ResolvedPackage { + pub id: String, + pub result: ResolveResult, + pub children: Vec, +} diff --git a/pacquet/crates/resolving-deps-resolver/src/tests.rs b/pacquet/crates/resolving-deps-resolver/src/tests.rs new file mode 100644 index 0000000000..5f44eb8291 --- /dev/null +++ b/pacquet/crates/resolving-deps-resolver/src/tests.rs @@ -0,0 +1,221 @@ +use std::{collections::HashMap, str::FromStr, sync::Mutex}; + +use pacquet_package_manifest::{DependencyGroup, PackageManifest}; +use pacquet_resolving_resolver_base::{ + LatestQuery, ResolveError, ResolveFuture, ResolveLatestFuture, ResolveOptions, ResolveResult, + Resolver, WantedDependency, +}; +use pretty_assertions::assert_eq; + +use crate::resolve_dependency_tree::{ + ResolveDependencyTreeError, ResolveDependencyTreeOptions, resolve_dependency_tree, +}; + +/// Stub resolver fed from a `(name, range)` → `ResolveResult` map. +/// Records each `(name, range)` query so tests can assert dedup. +struct StubResolver { + table: HashMap<(String, String), ResolveResult>, + calls: Mutex>, +} + +impl Resolver for StubResolver { + fn resolve<'a>( + &'a self, + wanted: &'a WantedDependency, + _opts: &'a ResolveOptions, + ) -> ResolveFuture<'a> { + let key = ( + wanted.alias.clone().unwrap_or_default(), + wanted.bare_specifier.clone().unwrap_or_default(), + ); + self.calls.lock().unwrap().push(key.clone()); + let result = self.table.get(&key).cloned(); + Box::pin(async move { Ok::<_, ResolveError>(result) }) + } + + fn resolve_latest<'a>( + &'a self, + _query: &'a LatestQuery, + _opts: &'a ResolveOptions, + ) -> ResolveLatestFuture<'a> { + Box::pin(async { Ok(None) }) + } +} + +fn fake_result(name: &str, version: &str, manifest: serde_json::Value) -> ResolveResult { + use pacquet_lockfile::{LockfileResolution, PkgName, PkgNameVer, TarballResolution}; + let id = PkgNameVer::new( + PkgName::parse(name).unwrap(), + node_semver::Version::from_str(version).unwrap(), + ); + ResolveResult { + id, + latest: Some(version.to_string()), + published_at: None, + manifest: Some(manifest), + resolution: LockfileResolution::Tarball(TarballResolution { + tarball: format!("https://registry.example/{name}-{version}.tgz"), + integrity: None, + git_hosted: None, + path: None, + }), + resolved_via: "npm-registry".to_string(), + normalized_bare_specifier: None, + alias: Some(name.to_string()), + policy_violation: None, + } +} + +fn fake_manifest(root_deps: serde_json::Value) -> (tempfile::TempDir, PackageManifest) { + let tmp = tempfile::tempdir().expect("tempdir"); + let path = tmp.path().join("package.json"); + let json = serde_json::json!({ + "name": "root", + "version": "0.0.0", + "dependencies": root_deps, + }); + std::fs::write(&path, serde_json::to_string(&json).unwrap()).expect("write package.json"); + let manifest = PackageManifest::from_path(path).expect("parse package.json"); + (tmp, manifest) +} + +#[tokio::test] +async fn walks_dependencies_and_builds_flat_tree() { + let mut table = HashMap::new(); + table.insert( + ("foo".to_string(), "^1.0.0".to_string()), + fake_result( + "foo", + "1.2.0", + serde_json::json!({ + "name": "foo", + "version": "1.2.0", + "dependencies": { "bar": "^2.0.0" } + }), + ), + ); + table.insert( + ("bar".to_string(), "^2.0.0".to_string()), + fake_result( + "bar", + "2.3.0", + serde_json::json!({ + "name": "bar", + "version": "2.3.0", + }), + ), + ); + let resolver = StubResolver { table, calls: Mutex::new(Vec::new()) }; + let (_tmp, manifest) = fake_manifest(serde_json::json!({ "foo": "^1.0.0" })); + + let tree = resolve_dependency_tree( + &resolver, + &manifest, + [DependencyGroup::Prod], + ResolveDependencyTreeOptions { + auto_install_peers: false, + base_opts: ResolveOptions::default(), + }, + ) + .await + .unwrap(); + + assert_eq!(tree.direct.len(), 1); + assert_eq!(tree.direct[0].alias, "foo"); + assert_eq!(tree.direct[0].id, "foo@1.2.0"); + assert_eq!(tree.packages.len(), 2); + let foo = tree.packages.get("foo@1.2.0").unwrap(); + assert_eq!(foo.children.len(), 1); + assert_eq!(foo.children[0].id, "bar@2.3.0"); + assert!(tree.policy_violations.is_empty()); +} + +#[tokio::test] +async fn dedupes_when_the_same_package_appears_in_two_subtrees() { + let mut table = HashMap::new(); + table.insert( + ("a".to_string(), "^1.0.0".to_string()), + fake_result( + "a", + "1.0.0", + serde_json::json!({ + "name": "a", + "version": "1.0.0", + "dependencies": { "shared": "^1.0.0" } + }), + ), + ); + table.insert( + ("b".to_string(), "^1.0.0".to_string()), + fake_result( + "b", + "1.0.0", + serde_json::json!({ + "name": "b", + "version": "1.0.0", + "dependencies": { "shared": "^1.0.0" } + }), + ), + ); + table.insert( + ("shared".to_string(), "^1.0.0".to_string()), + fake_result( + "shared", + "1.0.0", + serde_json::json!({ + "name": "shared", + "version": "1.0.0", + }), + ), + ); + let resolver = StubResolver { table, calls: Mutex::new(Vec::new()) }; + let (_tmp, manifest) = fake_manifest(serde_json::json!({ "a": "^1.0.0", "b": "^1.0.0" })); + + let tree = resolve_dependency_tree( + &resolver, + &manifest, + [DependencyGroup::Prod], + ResolveDependencyTreeOptions { + auto_install_peers: false, + base_opts: ResolveOptions::default(), + }, + ) + .await + .unwrap(); + + assert_eq!(tree.packages.len(), 3); + assert!(tree.packages.contains_key("a@1.0.0")); + assert!(tree.packages.contains_key("b@1.0.0")); + assert!(tree.packages.contains_key("shared@1.0.0")); +} + +/// A chain that declines every spec (every `resolve()` returns +/// `Ok(None)`) must NOT silently drop the edge — that would leave +/// installs missing transitive deps and report success. The walker +/// surfaces `SpecNotSupported` with the offending specifier +/// rendered the way upstream's `default-resolver` does, so callers +/// can produce the same `ERR_PNPM_SPEC_NOT_SUPPORTED_BY_ANY_RESOLVER` +/// diagnostic the chain dispatcher does. +#[tokio::test] +async fn declined_specifier_surfaces_spec_not_supported_error() { + let resolver = StubResolver { table: HashMap::new(), calls: Mutex::new(Vec::new()) }; + let (_tmp, manifest) = fake_manifest(serde_json::json!({ "foo": "git+ssh://example.com" })); + + let err = resolve_dependency_tree( + &resolver, + &manifest, + [DependencyGroup::Prod], + ResolveDependencyTreeOptions { + auto_install_peers: false, + base_opts: ResolveOptions::default(), + }, + ) + .await + .expect_err("declined spec must error"); + match err { + ResolveDependencyTreeError::SpecNotSupported { specifier } => { + assert_eq!(specifier, "foo@git+ssh://example.com"); + } + other => panic!("expected SpecNotSupported, got {other:?}"), + } +} diff --git a/pacquet/crates/resolving-npm-resolver/src/create_npm_resolution_verifier.rs b/pacquet/crates/resolving-npm-resolver/src/create_npm_resolution_verifier.rs index 8f513b7f23..953ed3a186 100644 --- a/pacquet/crates/resolving-npm-resolver/src/create_npm_resolution_verifier.rs +++ b/pacquet/crates/resolving-npm-resolver/src/create_npm_resolution_verifier.rs @@ -164,7 +164,16 @@ pub fn create_npm_resolution_verifier( let cutoff = if age_check_active { let minutes = opts.minimum_release_age.unwrap_or(0); let now = opts.now.unwrap_or_else(Utc::now); - Some(now - chrono::Duration::minutes(minutes as i64)) + // Checked arithmetic at every step so an absurd `u64` value + // can't wrap on cast, overflow inside `chrono::Duration`, or + // underflow the wall-clock subtraction. None means the cutoff + // couldn't be represented; the verifier degrades to "no age + // check" rather than fabricating a cutoff pointing the wrong + // direction. + i64::try_from(minutes) + .ok() + .and_then(chrono::Duration::try_minutes) + .and_then(|duration| now.checked_sub_signed(duration)) } else { None }; @@ -335,7 +344,7 @@ impl NpmResolutionVerifier { } } } - pick_registry_for_package(&self.registries, &name.to_string()) + pick_registry_for_package(&self.registries, &name.to_string(), None) } async fn run_age_check( diff --git a/pacquet/crates/resolving-npm-resolver/src/lib.rs b/pacquet/crates/resolving-npm-resolver/src/lib.rs index bc003c8b18..e4af347d48 100644 --- a/pacquet/crates/resolving-npm-resolver/src/lib.rs +++ b/pacquet/crates/resolving-npm-resolver/src/lib.rs @@ -1,15 +1,22 @@ -//! Pacquet port of the verifier surface of pnpm's -//! [`@pnpm/resolving.npm-resolver`](https://github.com/pnpm/pnpm/tree/2a9bd897bf/resolving/npm-resolver/src/). +//! Pacquet port of pnpm's +//! [`@pnpm/resolving.npm-resolver`](https://github.com/pnpm/pnpm/tree/f657b5cb44/resolving/npm-resolver/src/). //! -//! Today this crate ports the [`createNpmResolutionVerifier`](https://github.com/pnpm/pnpm/blob/2a9bd897bf/resolving/npm-resolver/src/createNpmResolutionVerifier.ts) -//! pipeline: a [`pacquet_resolving_resolver_base::ResolutionVerifier`] -//! that re-applies `minimumReleaseAge` and `trustPolicy='no-downgrade'` -//! to every npm-resolved lockfile entry the install loads. +//! Two surfaces: //! -//! The full upstream package also exposes resolution helpers -//! (`pickPackage`, `parseBareSpecifier`, …) that pacquet doesn't have -//! a use for yet — those land alongside a real resolver when one -//! arrives. +//! - **Resolver.** [`NpmResolver`] implements the +//! [`Resolver`](pacquet_resolving_resolver_base::Resolver) trait. +//! Ports upstream's +//! [`createNpmResolver` → `resolveNpm`](https://github.com/pnpm/pnpm/blob/f657b5cb44/resolving/npm-resolver/src/index.ts#L192-L611): +//! takes a [`WantedDependency`](pacquet_resolving_resolver_base::WantedDependency), +//! runs [`parse_bare_specifier()`], picks a version through +//! [`pick_package()`], and returns the +//! [`ResolveResult`](pacquet_resolving_resolver_base::ResolveResult) +//! the install layer consumes. +//! - **Verifier.** [`create_npm_resolution_verifier()`] is the +//! [`ResolutionVerifier`](pacquet_resolving_resolver_base::ResolutionVerifier) +//! the lockfile-verification gate uses. Re-applies +//! `minimumReleaseAge` and `trustPolicy='no-downgrade'` to every +//! npm-resolved lockfile entry the install loads. mod create_npm_resolution_verifier; mod errors; @@ -19,6 +26,8 @@ mod fetch_full_metadata_cached; mod lookup_context; mod mirror; mod named_registry; +mod npm_resolver; +mod parse_bare_specifier; mod pick_package; mod pick_package_from_meta; mod registry_url; @@ -37,6 +46,8 @@ pub use named_registry::{ BUILTIN_NAMED_REGISTRIES, build_named_registry_prefixes, pick_registry_for_package, pick_registry_for_version, }; +pub use npm_resolver::NpmResolver; +pub use parse_bare_specifier::parse_bare_specifier; pub use pick_package::{ InMemoryPackageMetaCache, MirrorPersistError, PackageMetaCache, PickPackageContext, PickPackageError, PickPackageOptions, PickPackageResult, persist_meta_to_mirror, pick_package, diff --git a/pacquet/crates/resolving-npm-resolver/src/named_registry.rs b/pacquet/crates/resolving-npm-resolver/src/named_registry.rs index 405850c7aa..1547f03c16 100644 --- a/pacquet/crates/resolving-npm-resolver/src/named_registry.rs +++ b/pacquet/crates/resolving-npm-resolver/src/named_registry.rs @@ -90,15 +90,32 @@ pub fn pick_registry_for_version( } } } - pick_registry_for_package(registries, name) + pick_registry_for_package(registries, name, None) } /// Default-vs-scope routing for an npm package. Mirrors pnpm's -/// [`pickRegistryForPackage`](https://github.com/pnpm/pnpm/blob/2a9bd897bf/config/pick-registry-for-package/src/index.ts#L3-L6). -/// `@scope/foo` consults `registries[@scope]`; everything else -/// (including unscoped) falls through to `registries["default"]`. -pub fn pick_registry_for_package(registries: &HashMap, name: &str) -> String { - if let Some(scope) = scope_of(name) +/// [`pickRegistryForPackage`](https://github.com/pnpm/pnpm/blob/main/config/pick-registry-for-package/src/index.ts). +/// +/// Routing rules: +/// +/// 1. **`npm:` alias.** When `bare_specifier` is an `npm:` alias the +/// *alias target* decides routing, not the local key: +/// - `npm:@scope/name@` → `registries[@scope]`. +/// - `npm:name@` (unscoped target) → `registries["default"]`, +/// never the local alias's scope, because the fetched package is +/// unscoped and doesn't live on a scoped registry. +/// 2. **Plain spec.** Falls back to `pkg_name`'s scope when present; +/// otherwise `registries["default"]`. +pub fn pick_registry_for_package( + registries: &HashMap, + pkg_name: &str, + bare_specifier: Option<&str>, +) -> String { + let scope = match bare_specifier.and_then(|spec| spec.strip_prefix("npm:")) { + Some(target) => scope_of(target), + None => scope_of(pkg_name), + }; + if let Some(scope) = scope && let Some(url) = registries.get(scope) { return url.clone(); diff --git a/pacquet/crates/resolving-npm-resolver/src/named_registry/tests.rs b/pacquet/crates/resolving-npm-resolver/src/named_registry/tests.rs index 8e83b8782f..7268fbcd7f 100644 --- a/pacquet/crates/resolving-npm-resolver/src/named_registry/tests.rs +++ b/pacquet/crates/resolving-npm-resolver/src/named_registry/tests.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use pretty_assertions::assert_eq; -use super::{build_named_registry_prefixes, pick_registry_for_version}; +use super::{build_named_registry_prefixes, pick_registry_for_package, pick_registry_for_version}; fn registries(entries: &[(&str, &str)]) -> HashMap { entries.iter().map(|(k, v)| ((*k).to_string(), (*v).to_string())).collect() @@ -76,6 +76,67 @@ fn falls_back_to_scope_routing_without_tarball() { assert_eq!(bare, "https://registry.npmjs.org/"); } +/// `pick_registry_for_package` consults the `npm:@scope/...` form of +/// `bare_specifier` for the scope override before falling back to the +/// scope of the local package name. Without this, an npm-alias entry +/// like `"foo": "npm:@acme/bar@^1"` would route through the empty +/// scope of `foo` and miss the user's `registries[@acme]` override. +#[test] +fn npm_alias_uses_bare_specifier_scope_over_local_name() { + let regs = registries(&[ + ("default", "https://registry.npmjs.org/"), + ("@acme", "https://npm.acme.example/"), + ]); + let picked = pick_registry_for_package(®s, "foo", Some("npm:@acme/bar@^1")); + assert_eq!(picked, "https://npm.acme.example/"); +} + +/// When no `npm:` prefix is in play, the local package name's scope +/// still drives routing (preserves the legacy behavior for plain +/// `"@scope/foo": "^1"` manifest entries). +#[test] +fn falls_back_to_pkg_name_scope_without_npm_alias() { + let regs = registries(&[ + ("default", "https://registry.npmjs.org/"), + ("@private", "https://internal/registry/"), + ]); + let picked = pick_registry_for_package(®s, "@private/foo", Some("^1.0.0")); + assert_eq!(picked, "https://internal/registry/"); +} + +/// Scoped local name + scoped `npm:` target in a **different scope**: +/// the target's scope wins. The package being fetched is +/// `@scope2/bar`, so routing follows `@scope2`, not the local +/// `@scope1/` slot. Mirrors upstream's `'npm:@private/lodash@1'` +/// case in `config/pick-registry-for-package/test/index.spec.ts`. +#[test] +fn scoped_npm_alias_target_in_different_scope_wins_over_local() { + let regs = registries(&[ + ("default", "https://registry.npmjs.org/"), + ("@scope1", "https://scope1.registry/"), + ("@scope2", "https://scope2.registry/"), + ]); + let picked = pick_registry_for_package(®s, "@scope1/foo", Some("npm:@scope2/bar@^1.0.0")); + assert_eq!(picked, "https://scope2.registry/"); +} + +/// An unscoped `npm:` alias target (`"@private/foo": "npm:lodash@^1"`) +/// routes through the **default** registry, not the local alias's +/// scope. The fetched package is `lodash` (unscoped); the local +/// `@private/` slot is just where the install lands in +/// `node_modules`, and `lodash` doesn't live on a scoped registry. +/// Mirrors the upstream fix in +/// `config/pick-registry-for-package`. +#[test] +fn unscoped_npm_alias_target_routes_to_default() { + let regs = registries(&[ + ("default", "https://registry.npmjs.org/"), + ("@private", "https://internal/registry/"), + ]); + let picked = pick_registry_for_package(®s, "@private/foo", Some("npm:lodash@^1")); + assert_eq!(picked, "https://registry.npmjs.org/"); +} + /// A tarball URL that's *almost* a prefix match — same host, but /// without the trailing slash on the prefix — must not silently /// route. The trailing-slash on every built prefix is what makes diff --git a/pacquet/crates/resolving-npm-resolver/src/npm_resolver.rs b/pacquet/crates/resolving-npm-resolver/src/npm_resolver.rs new file mode 100644 index 0000000000..27edb30226 --- /dev/null +++ b/pacquet/crates/resolving-npm-resolver/src/npm_resolver.rs @@ -0,0 +1,328 @@ +//! Pacquet port of pnpm's npm-registry resolver. Wraps +//! [`parse_bare_specifier`](crate::parse_bare_specifier()) plus +//! [`pick_package`](crate::pick_package()) behind the chain-friendly +//! [`Resolver`] trait so the default-resolver dispatcher can dispatch +//! npm-shaped dependencies through it. +//! +//! Mirrors upstream's +//! [`createNpmResolver` → `resolveNpm`](https://github.com/pnpm/pnpm/blob/f657b5cb44/resolving/npm-resolver/src/index.ts#L192-L611) +//! pair: the struct owns the registry config + network handles + meta +//! cache; the trait implementation parses the bare specifier, picks a +//! version, and maps the result to [`ResolveResult`]. +//! +//! Out of scope for this port: +//! +//! - **Workspace resolution.** `workspace:` specs return `Ok(None)` so +//! the dispatcher falls through to the workspace resolver when that +//! crate lands. +//! - **`peekManifestFromStore` fast path.** Upstream short-circuits a +//! registry fetch when the lockfile-pinned tarball is already in the +//! store. Pacquet today goes through the picker unconditionally; +//! restoring the fast path is a separate item. +//! - **Trust-policy enforcement.** The resolver-side +//! `failIfTrustDowngraded` call is wired through the verifier crate +//! only; the resolver path doesn't enforce it yet. + +use std::{collections::HashMap, path::PathBuf, sync::Arc}; + +use chrono::{DateTime, Utc}; +use pacquet_config::version_policy::PackageVersionPolicy; +use pacquet_lockfile::{LockfileResolution, PkgName, PkgNameVer, TarballResolution}; +use pacquet_network::{AuthHeaders, ThrottledClient}; +use pacquet_registry::{Package, PackageVersion}; +use pacquet_resolving_resolver_base::{ + LatestInfo, LatestQuery, ResolutionPolicyViolation, ResolveError, ResolveFuture, + ResolveLatestFuture, ResolveOptions, ResolveResult, Resolver, UpdateBehavior, WantedDependency, +}; + +use crate::{ + named_registry::pick_registry_for_package, + parse_bare_specifier::parse_bare_specifier, + pick_package::{PackageMetaCache, PickPackageContext, PickPackageOptions, pick_package}, + pick_package_from_meta::{RegistryPackageSpec, RegistryPackageSpecType}, + violation_codes::MINIMUM_RELEASE_AGE_VIOLATION_CODE, +}; + +/// npm-registry resolver. +/// +/// One instance per install. Mirrors upstream's +/// [`createNpmResolver`](https://github.com/pnpm/pnpm/blob/f657b5cb44/resolving/npm-resolver/src/index.ts#L192-L289) +/// factory return value: registries map, named-registry overrides, +/// throttled HTTP client, auth-header table, on-disk metadata mirror +/// root, and the install-shared metadata cache the picker reads +/// through. +pub struct NpmResolver { + /// `default` plus per-scope (`@scope`) entries. The keys mirror + /// pnpm's `Registries` shape; the picker consults the `default` + /// entry as the install-wide default and the scope entry when the + /// resolved package name carries one. Pacquet today only populates + /// `default` — per-scope wiring lands when `.npmrc`'s + /// `:registry` parsing does. + pub registries: HashMap, + /// User-supplied named-registry aliases (e.g. `gh:` → + /// `https://npm.pkg.github.com/`). Merged with + /// [`crate::BUILTIN_NAMED_REGISTRIES`] at construction. Today + /// only consulted by the named-registry resolver (out of scope + /// for this port); kept here so the install layer can build one + /// resolver instance with the full registry view. + pub named_registries: HashMap, + pub http_client: Arc, + pub auth_headers: Arc, + pub meta_cache: Arc, + /// Root of the on-disk metadata mirror. `None` disables every + /// disk read/write — the picker goes straight to the network on + /// each cache miss. + pub cache_dir: Option, + pub offline: bool, + pub prefer_offline: bool, + pub ignore_missing_time_field: bool, +} + +impl Resolver for NpmResolver { + fn resolve<'a>( + &'a self, + wanted_dependency: &'a WantedDependency, + opts: &'a ResolveOptions, + ) -> ResolveFuture<'a> { + Box::pin(self.resolve_impl(wanted_dependency, opts)) + } + + fn resolve_latest<'a>( + &'a self, + query: &'a LatestQuery, + opts: &'a ResolveOptions, + ) -> ResolveLatestFuture<'a> { + Box::pin(self.resolve_latest_impl(query, opts)) + } +} + +impl NpmResolver { + async fn resolve_impl( + &self, + wanted_dependency: &WantedDependency, + opts: &ResolveOptions, + ) -> Result, ResolveError> { + let default_tag = opts.default_tag.as_deref().unwrap_or("latest"); + + // `workspace:` is owned by the workspace resolver. Decline so + // the chain dispatches there once that crate lands. + if wanted_dependency + .bare_specifier + .as_deref() + .is_some_and(|bare| bare.starts_with("workspace:")) + { + return Ok(None); + } + + // Pick registry from `(alias, bare_specifier)` so an npm-alias + // entry like `"foo": "npm:@scope/bar@^1"` routes through + // `registries[@scope]` instead of the alias's own scope. + // Mirrors upstream's + // [`pickRegistryForPackage`](https://github.com/pnpm/pnpm/blob/2a9bd897bf/config/pick-registry-for-package/src/index.ts). + let registry = pick_registry_for_package( + &self.registries, + wanted_dependency.alias.as_deref().unwrap_or_default(), + wanted_dependency.bare_specifier.as_deref(), + ); + + let spec = match wanted_dependency.bare_specifier.as_deref() { + Some(bare) => { + match parse_bare_specifier( + bare, + wanted_dependency.alias.as_deref(), + default_tag, + ®istry, + ) { + Some(spec) => spec, + None => return Ok(None), + } + } + None => match wanted_dependency.alias.as_deref() { + Some(alias) if !alias.is_empty() => default_tag_spec(alias, default_tag), + _ => return Ok(None), + }, + }; + + let pick_opts = PickPackageOptions { + registry: ®istry, + preferred_version_selectors: opts.preferred_versions.get(&spec.name), + published_by: opts.published_by, + published_by_exclude: opts.published_by_exclude.as_ref(), + pick_lowest_version: opts.pick_lowest_version, + include_latest_tag: opts.update == UpdateBehavior::Latest, + dry_run: opts.dry_run, + }; + + let ctx = PickPackageContext { + http_client: &self.http_client, + auth_headers: &self.auth_headers, + meta_cache: self.meta_cache.as_ref(), + cache_dir: self.cache_dir.as_deref(), + offline: self.offline, + prefer_offline: self.prefer_offline, + ignore_missing_time_field: self.ignore_missing_time_field, + }; + + let pick_result = pick_package(&ctx, &spec, &pick_opts) + .await + .map_err(|err| Box::new(err) as ResolveError)?; + + let Some(picked) = pick_result.picked_package else { + return Ok(None); + }; + + let result = build_resolve_result( + &pick_result.meta, + &picked, + &spec, + wanted_dependency.alias.as_deref(), + opts.published_by, + opts.published_by_exclude.as_ref(), + )?; + + Ok(Some(result)) + } + + /// Latest-version companion. Mirrors upstream's + /// [`createResolveLatest`](https://github.com/pnpm/pnpm/blob/f657b5cb44/resolving/npm-resolver/src/index.ts#L323-L353) + /// closure: feed `wanted.bareSpecifier ?? 'latest'` plus + /// `update: 'latest'` (or the original opts under `compatible`) back + /// through `resolve`, then return the picked manifest. + async fn resolve_latest_impl( + &self, + query: &LatestQuery, + opts: &ResolveOptions, + ) -> Result, ResolveError> { + // Mirror upstream's `createResolveLatest`: only the + // `bare_specifier` is rewritten (synthesized to the default + // tag when missing). Cloning the rest of the wanted + // dependency preserves `injected` / `prev_specifier` / + // `optional`, which downstream resolver branches may yet + // consult even though the npm resolver doesn't today. + let mut wanted = query.wanted_dependency.clone(); + if wanted.bare_specifier.is_none() { + wanted.bare_specifier = Some("latest".to_string()); + } + let mut resolve_opts = opts.clone(); + if !query.compatible { + resolve_opts.update = UpdateBehavior::Latest; + } + let result = self.resolve_impl(&wanted, &resolve_opts).await?; + let Some(result) = result else { + return Ok(None); + }; + if result + .policy_violation + .as_ref() + .is_some_and(|violation| violation.code == MINIMUM_RELEASE_AGE_VIOLATION_CODE) + { + return Ok(Some(LatestInfo { latest_manifest: None })); + } + Ok(Some(LatestInfo { latest_manifest: result.manifest })) + } +} + +/// `bare_specifier` is absent but `alias` is present: synthesize a tag +/// spec pointing at the default tag, mirroring upstream's +/// [`defaultTagForAlias`](https://github.com/pnpm/pnpm/blob/f657b5cb44/resolving/npm-resolver/src/index.ts#L1000-L1006). +fn default_tag_spec(alias: &str, default_tag: &str) -> RegistryPackageSpec { + RegistryPackageSpec { + name: alias.to_string(), + fetch_spec: default_tag.to_string(), + spec_type: RegistryPackageSpecType::Tag, + normalized_bare_specifier: None, + } +} + +fn build_resolve_result( + meta: &Package, + picked: &PackageVersion, + spec: &RegistryPackageSpec, + alias: Option<&str>, + published_by: Option>, + published_by_exclude: Option<&PackageVersionPolicy>, +) -> Result { + let pkg_name = + PkgName::parse(picked.name.as_str()).map_err(|err| Box::new(err) as ResolveError)?; + let id = PkgNameVer::new(pkg_name.clone(), picked.version.clone()); + // The picker always carries a tarball URL on its `dist` payload — + // every npm registry serves `dist.tarball` on a successful pick + // and pacquet's deserializer requires it (`dist.tarball: String`, + // not `Option`). Always emit `Tarball`, never `Registry`. The + // install side's `extract_tarball` only handles `Tarball`, so + // mixing the two shapes would force a Registry → URL + // reconstruction with no payoff: at resolve time we already have + // the URL the install path needs. + let resolution = LockfileResolution::Tarball(TarballResolution { + tarball: picked.dist.tarball.clone(), + integrity: picked.dist.integrity.clone(), + git_hosted: None, + path: None, + }); + let published_at = meta.published_at(&picked.version.to_string()).map(str::to_string); + let manifest = Some(serde_json::to_value(picked).map_err(|err| Box::new(err) as ResolveError)?); + let policy_violation = detect_min_release_age_violation( + &pkg_name, + &picked.version.to_string(), + published_at.as_deref(), + &resolution, + published_by, + published_by_exclude, + ); + Ok(ResolveResult { + id, + latest: meta.dist_tag("latest").map(str::to_string), + published_at, + manifest, + resolution, + resolved_via: "npm-registry".to_string(), + normalized_bare_specifier: spec.normalized_bare_specifier.clone(), + alias: alias.map(str::to_string), + policy_violation, + }) +} + +/// Resolver-time `minimumReleaseAge` check. Returns a violation entry +/// when the picked version's publish timestamp falls past the policy +/// cutoff and isn't excluded by name/version. Mirrors upstream's +/// [`detectMinReleaseAgeViolation`](https://github.com/pnpm/pnpm/blob/f657b5cb44/resolving/npm-resolver/src/index.ts#L1023-L1044). +fn detect_min_release_age_violation( + name: &PkgName, + version: &str, + published_at: Option<&str>, + resolution: &LockfileResolution, + published_by: Option>, + published_by_exclude: Option<&PackageVersionPolicy>, +) -> Option { + let cutoff = published_by?; + let timestamp = published_at?; + if let Some(policy) = published_by_exclude { + use pacquet_config::version_policy::PolicyMatch; + match policy.matches(&name.to_string()) { + PolicyMatch::AnyVersion => return None, + PolicyMatch::ExactVersions(versions) + if versions.iter().any(|exact| exact == version) => + { + return None; + } + _ => {} + } + } + let parsed = DateTime::parse_from_rfc3339(timestamp).ok()?.with_timezone(&Utc); + if parsed <= cutoff { + return None; + } + Some(ResolutionPolicyViolation { + name: name.clone(), + version: version.to_string(), + resolution: resolution.clone(), + code: MINIMUM_RELEASE_AGE_VIOLATION_CODE, + reason: format!( + "was published at {timestamp}, within the minimumReleaseAge cutoff ({cutoff})", + cutoff = cutoff.to_rfc3339_opts(chrono::SecondsFormat::Millis, true), + ), + }) +} + +#[cfg(test)] +mod tests; diff --git a/pacquet/crates/resolving-npm-resolver/src/npm_resolver/tests.rs b/pacquet/crates/resolving-npm-resolver/src/npm_resolver/tests.rs new file mode 100644 index 0000000000..bc6f799e4a --- /dev/null +++ b/pacquet/crates/resolving-npm-resolver/src/npm_resolver/tests.rs @@ -0,0 +1,186 @@ +use std::{collections::HashMap, sync::Arc}; + +use chrono::TimeZone; +use pacquet_lockfile::LockfileResolution; +use pacquet_network::{AuthHeaders, ThrottledClient}; +use pacquet_resolving_resolver_base::{ + LatestQuery, ResolveOptions, Resolver, UpdateBehavior, WantedDependency, +}; +use pretty_assertions::assert_eq; +use tempfile::TempDir; + +use crate::{ + npm_resolver::NpmResolver, pick_package::InMemoryPackageMetaCache, + violation_codes::MINIMUM_RELEASE_AGE_VIOLATION_CODE, +}; + +const PACKAGE_BODY: &str = r#"{ + "name": "acme", + "dist-tags": { "latest": "1.1.0" }, + "modified": "2025-01-15T12:00:00.000Z", + "time": { + "1.0.0": "2024-01-10T08:30:00.000Z", + "1.1.0": "2024-12-10T08:30:00.000Z" + }, + "versions": { + "1.0.0": { + "name": "acme", + "version": "1.0.0", + "dist": { + "integrity": "sha512-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==", + "shasum": "0000000000000000000000000000000000000000", + "tarball": "https://registry/acme-1.0.0.tgz" + } + }, + "1.1.0": { + "name": "acme", + "version": "1.1.0", + "dist": { + "integrity": "sha512-BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB==", + "shasum": "1111111111111111111111111111111111111111", + "tarball": "https://registry/acme-1.1.0.tgz" + } + } + } +}"#; + +fn build_resolver(registry: &str) -> (NpmResolver, TempDir) { + let cache_dir = TempDir::new().expect("tempdir"); + let mut registries = HashMap::new(); + registries.insert("default".to_string(), registry.to_string()); + let resolver = NpmResolver { + registries, + named_registries: HashMap::new(), + http_client: Arc::new(ThrottledClient::default()), + auth_headers: Arc::new(AuthHeaders::default()), + meta_cache: Arc::new(InMemoryPackageMetaCache::default()), + cache_dir: Some(cache_dir.path().to_path_buf()), + offline: false, + prefer_offline: false, + ignore_missing_time_field: false, + }; + (resolver, cache_dir) +} + +#[tokio::test] +async fn range_specifier_picks_max_in_range() { + let mut server = mockito::Server::new_async().await; + let _mock = + server.mock("GET", "/acme").with_status(200).with_body(PACKAGE_BODY).create_async().await; + let registry = format!("{}/", server.url()); + let (resolver, _tempdir) = build_resolver(®istry); + + let wanted = WantedDependency { + alias: Some("acme".to_string()), + bare_specifier: Some("^1.0.0".to_string()), + ..WantedDependency::default() + }; + let result = resolver.resolve(&wanted, &ResolveOptions::default()).await.unwrap().unwrap(); + assert_eq!(result.id.name.to_string(), "acme"); + assert_eq!(result.id.suffix.to_string(), "1.1.0"); + assert_eq!(result.latest.as_deref(), Some("1.1.0")); + assert_eq!(result.resolved_via, "npm-registry"); + assert_eq!(result.alias.as_deref(), Some("acme")); + assert!(result.policy_violation.is_none()); + assert!(matches!(result.resolution, LockfileResolution::Tarball(_))); +} + +#[tokio::test] +async fn workspace_specifier_returns_none_for_chain_fallthrough() { + let server = mockito::Server::new_async().await; + let registry = format!("{}/", server.url()); + let (resolver, _tempdir) = build_resolver(®istry); + + let wanted = WantedDependency { + alias: Some("acme".to_string()), + bare_specifier: Some("workspace:*".to_string()), + ..WantedDependency::default() + }; + let result = resolver.resolve(&wanted, &ResolveOptions::default()).await.unwrap(); + assert!(result.is_none()); +} + +#[tokio::test] +async fn missing_bare_specifier_synthesizes_default_tag_query() { + let mut server = mockito::Server::new_async().await; + let _mock = + server.mock("GET", "/acme").with_status(200).with_body(PACKAGE_BODY).create_async().await; + let registry = format!("{}/", server.url()); + let (resolver, _tempdir) = build_resolver(®istry); + + let wanted = + WantedDependency { alias: Some("acme".to_string()), ..WantedDependency::default() }; + let result = resolver.resolve(&wanted, &ResolveOptions::default()).await.unwrap().unwrap(); + assert_eq!(result.id.suffix.to_string(), "1.1.0"); +} + +#[tokio::test] +async fn surfaces_min_release_age_violation_inline() { + let mut server = mockito::Server::new_async().await; + let _mock = + server.mock("GET", "/acme").with_status(200).with_body(PACKAGE_BODY).create_async().await; + let registry = format!("{}/", server.url()); + let (resolver, _tempdir) = build_resolver(®istry); + + // Cutoff sits between 1.0.0 (2024-01-10) and 1.1.0 (2024-12-10): + // the picker should fall back to 1.0.0 as the highest mature + // version and the picked result should *not* trip a violation. + // To force a violation we set the cutoff before both versions. + let published_by = Some(chrono::Utc.with_ymd_and_hms(2023, 12, 1, 0, 0, 0).unwrap()); + let opts = ResolveOptions { published_by, ..ResolveOptions::default() }; + let wanted = WantedDependency { + alias: Some("acme".to_string()), + bare_specifier: Some("^1.0.0".to_string()), + ..WantedDependency::default() + }; + let result = resolver.resolve(&wanted, &opts).await.unwrap().unwrap(); + let violation = result.policy_violation.expect("violation surfaced"); + assert_eq!(violation.code, MINIMUM_RELEASE_AGE_VIOLATION_CODE); +} + +#[tokio::test] +async fn resolve_latest_returns_picked_manifest() { + let mut server = mockito::Server::new_async().await; + let _mock = + server.mock("GET", "/acme").with_status(200).with_body(PACKAGE_BODY).create_async().await; + let registry = format!("{}/", server.url()); + let (resolver, _tempdir) = build_resolver(®istry); + + let query = LatestQuery { + wanted_dependency: WantedDependency { + alias: Some("acme".to_string()), + bare_specifier: Some("^1.0.0".to_string()), + ..WantedDependency::default() + }, + compatible: false, + }; + let info = resolver + .resolve_latest(&query, &ResolveOptions::default()) + .await + .unwrap() + .expect("latest info"); + let manifest = info.latest_manifest.expect("manifest present"); + assert_eq!(manifest["version"].as_str(), Some("1.1.0")); +} + +#[tokio::test] +async fn resolve_latest_under_compatible_does_not_override_update_to_latest() { + let mut server = mockito::Server::new_async().await; + let _mock = + server.mock("GET", "/acme").with_status(200).with_body(PACKAGE_BODY).create_async().await; + let registry = format!("{}/", server.url()); + let (resolver, _tempdir) = build_resolver(®istry); + + let query = LatestQuery { + wanted_dependency: WantedDependency { + alias: Some("acme".to_string()), + bare_specifier: Some("^1.0.0".to_string()), + ..WantedDependency::default() + }, + compatible: true, + }; + let opts = ResolveOptions { update: UpdateBehavior::Off, ..ResolveOptions::default() }; + let info = resolver.resolve_latest(&query, &opts).await.unwrap().expect("latest info"); + let manifest = info.latest_manifest.expect("manifest present"); + assert_eq!(manifest["version"].as_str(), Some("1.1.0")); +} diff --git a/pacquet/crates/resolving-npm-resolver/src/parse_bare_specifier.rs b/pacquet/crates/resolving-npm-resolver/src/parse_bare_specifier.rs new file mode 100644 index 0000000000..856d1ab4ad --- /dev/null +++ b/pacquet/crates/resolving-npm-resolver/src/parse_bare_specifier.rs @@ -0,0 +1,205 @@ +//! Pacquet port of pnpm's +//! [`parseBareSpecifier`](https://github.com/pnpm/pnpm/blob/f657b5cb44/resolving/npm-resolver/src/parseBareSpecifier.ts). +//! +//! Routes a raw bare specifier (`"^1.0.0"`, `"latest"`, +//! `"npm:lodash@^4"`, `"https://registry.npmjs.org/foo/-/foo-1.0.0.tgz"`) +//! to a [`RegistryPackageSpec`] the npm picker can consume, or `None` +//! when no npm-shaped interpretation applies — that's the signal to the +//! resolver chain to try the next resolver in the chain. + +use node_semver::{Range, Version}; +use reqwest::Url; + +use crate::pick_package_from_meta::{RegistryPackageSpec, RegistryPackageSpecType}; + +/// Discriminator + normalized form produced by [`get_version_selector_type`]. +struct VersionSelectorMatch { + spec_type: RegistryPackageSpecType, + normalized: String, +} + +/// Parse an npm-style `(bare_specifier, alias, default_tag, registry)` +/// into a [`RegistryPackageSpec`]. +/// +/// Returns `None` for any specifier the npm resolver doesn't claim +/// (git URLs, workspace protocol, catalog protocol, etc.), so the +/// resolver chain falls through to the next entry. +pub fn parse_bare_specifier( + bare_specifier: &str, + alias: Option<&str>, + default_tag: &str, + registry: &str, +) -> Option { + let mut name: Option = alias.map(str::to_string); + let mut bare = bare_specifier.to_string(); + + if let Some(rest) = bare.strip_prefix("npm:") { + bare = rest.to_string(); + + let alias_str = alias; + // `npm:` paired with a non-empty alias keeps + // the alias as the package name, mirroring the named-registry + // shape (`gh:^1.0.0`). Restricted to semver ranges/versions so + // unscoped names like `npm:is-positive` keep their npm package- + // aliasing meaning instead of being read as a tag. + if let Some(a) = alias_str + && !a.is_empty() + && Range::parse(&bare).is_ok() + { + name = Some(a.to_string()); + } else { + // Last `@` discriminates `name@version`. `index < 1` covers + // both no-`@` (`npm:foo`) and leading-`@` (`npm:@scope/foo`, + // no version) cases — both fall back to the default tag. + let last_at = + bare.bytes().enumerate().rev().find_map(|(i, b)| (b == b'@').then_some(i)); + match last_at { + Some(idx) if idx >= 1 => { + name = Some(bare[..idx].to_string()); + bare = bare[idx + 1..].to_string(); + } + _ => { + name = Some(bare.clone()); + bare = default_tag.to_string(); + } + } + } + } + + if let Some(name) = name.as_ref() + && !name.is_empty() + && let Some(selector) = get_version_selector_type(&bare) + { + return Some(RegistryPackageSpec { + name: name.clone(), + fetch_spec: selector.normalized, + spec_type: selector.spec_type, + normalized_bare_specifier: None, + }); + } + + if bare.starts_with(registry) + && let Some(pkg) = parse_npm_tarball_url(&bare) + { + return Some(RegistryPackageSpec { + name: pkg.name, + fetch_spec: pkg.version, + spec_type: RegistryPackageSpecType::Version, + normalized_bare_specifier: Some(bare), + }); + } + + None +} + +/// Discriminate between an exact version, a semver range, and a +/// dist-tag, returning the normalized form alongside the discriminator. +/// Mirrors npm's +/// [`version-selector-type`](https://github.com/pnpm/version-selector-type/blob/v3.0.0/index.js): +/// version first, range second, tag last. Returns `None` only when the +/// selector contains characters that JS's `encodeURIComponent` would +/// escape (i.e. not a valid npm tag). +fn get_version_selector_type(selector: &str) -> Option { + if let Ok(version) = Version::parse(selector) { + return Some(VersionSelectorMatch { + spec_type: RegistryPackageSpecType::Version, + normalized: version.to_string(), + }); + } + if Range::parse(selector).is_ok() { + return Some(VersionSelectorMatch { + spec_type: RegistryPackageSpecType::Range, + normalized: selector.to_string(), + }); + } + if is_valid_dist_tag(selector) { + return Some(VersionSelectorMatch { + spec_type: RegistryPackageSpecType::Tag, + normalized: selector.to_string(), + }); + } + None +} + +/// Mirrors JS's `encodeURIComponent(s) === s` check upstream uses to +/// reject anything not safe to embed in a URL segment. The unreserved +/// set is `A-Z a-z 0-9 - _ . ! ~ * ' ( )` — anything else (including +/// `/`, `:`, spaces) bumps the candidate out of the tag bucket so +/// protocol-prefixed specifiers fall through to the next resolver. +fn is_valid_dist_tag(selector: &str) -> bool { + selector.bytes().all(|byte| { + matches!(byte, + b'A'..=b'Z' + | b'a'..=b'z' + | b'0'..=b'9' + | b'-' | b'_' | b'.' | b'!' | b'~' | b'*' | b'\'' | b'(' | b')') + }) +} + +struct NpmTarballUrl { + name: String, + version: String, +} + +/// Pacquet port of npm's +/// [`parse-npm-tarball-url`](https://github.com/zkochan/packages/blob/main/parse-npm-tarball-url/src/index.ts). +/// Extracts `(name, version)` from a URL like +/// `https://registry.npmjs.org/foo/-/foo-1.0.0.tgz`. Returns `None` +/// when the URL doesn't fit the npm tarball layout or the trailing +/// version segment isn't valid semver. +fn parse_npm_tarball_url(url: &str) -> Option { + let parsed = Url::parse(url).ok()?; + parsed.host_str()?; + let path = parsed.path(); + if path.is_empty() { + return None; + } + let parts: Vec<&str> = path.split("/-/").collect(); + if parts.len() != 2 { + return None; + } + let raw_name = parts[0].strip_prefix('/').unwrap_or(parts[0]); + if raw_name.is_empty() { + return None; + } + let name = percent_decode_str(raw_name); + if name.is_empty() { + return None; + } + let path_with_no_ext = parts[1].strip_suffix(".tgz").unwrap_or(parts[1]); + // The tarball filename always starts with the scopeless name + // followed by `-`. Anchor on that prefix instead of slicing by + // length so a registry that returns `foo/-/bar-1.0.0.tgz` (name + // mismatch) doesn't get accepted and mapped to the wrong package. + let scopeless_name = name.rsplit('/').next().unwrap_or(name.as_str()); + let version = + path_with_no_ext.strip_prefix(scopeless_name).and_then(|rest| rest.strip_prefix('-'))?; + Version::parse(version).ok()?; + Some(NpmTarballUrl { name, version: version.to_string() }) +} + +/// Percent-decode a URL path segment. Matches JS's `decodeURIComponent` +/// for the byte ranges that show up in npm tarball URLs (the only +/// caller). Invalid escapes pass through unchanged, mirroring the +/// `percent_decode_str` helper in `pacquet-network`'s proxy module. +fn percent_decode_str(text: &str) -> String { + let bytes = text.as_bytes(); + let mut out = Vec::with_capacity(bytes.len()); + let mut idx = 0; + while idx < bytes.len() { + if bytes[idx] == b'%' && idx + 2 < bytes.len() { + let hex = std::str::from_utf8(&bytes[idx + 1..idx + 3]).ok(); + if let Some(byte) = hex.and_then(|hex_digits| u8::from_str_radix(hex_digits, 16).ok()) { + out.push(byte); + idx += 3; + continue; + } + } + out.push(bytes[idx]); + idx += 1; + } + String::from_utf8_lossy(&out).into_owned() +} + +#[cfg(test)] +mod tests; diff --git a/pacquet/crates/resolving-npm-resolver/src/parse_bare_specifier/tests.rs b/pacquet/crates/resolving-npm-resolver/src/parse_bare_specifier/tests.rs new file mode 100644 index 0000000000..cff8e56b73 --- /dev/null +++ b/pacquet/crates/resolving-npm-resolver/src/parse_bare_specifier/tests.rs @@ -0,0 +1,151 @@ +use crate::{ + parse_bare_specifier::parse_bare_specifier, pick_package_from_meta::RegistryPackageSpecType, +}; + +const DEFAULT_TAG: &str = "latest"; +const REGISTRY: &str = "https://registry.npmjs.org/"; + +#[test] +fn version_selector_classified_as_version() { + let spec = parse_bare_specifier("1.0.0", Some("foo"), DEFAULT_TAG, REGISTRY).unwrap(); + assert_eq!(spec.name, "foo"); + assert_eq!(spec.fetch_spec, "1.0.0"); + assert_eq!(spec.spec_type, RegistryPackageSpecType::Version); +} + +#[test] +fn range_selector_classified_as_range() { + let spec = parse_bare_specifier("^1.0.0", Some("foo"), DEFAULT_TAG, REGISTRY).unwrap(); + assert_eq!(spec.name, "foo"); + assert_eq!(spec.fetch_spec, "^1.0.0"); + assert_eq!(spec.spec_type, RegistryPackageSpecType::Range); +} + +#[test] +fn tag_selector_classified_as_tag() { + let spec = parse_bare_specifier("latest", Some("foo"), DEFAULT_TAG, REGISTRY).unwrap(); + assert_eq!(spec.name, "foo"); + assert_eq!(spec.fetch_spec, "latest"); + assert_eq!(spec.spec_type, RegistryPackageSpecType::Tag); +} + +#[test] +fn no_alias_no_npm_prefix_declines() { + assert!(parse_bare_specifier("^1.0.0", None, DEFAULT_TAG, REGISTRY).is_none()); +} + +#[test] +fn npm_alias_with_range_uses_outer_alias_as_name() { + let spec = + parse_bare_specifier("npm:^1.0.0", Some("is-positive"), DEFAULT_TAG, REGISTRY).unwrap(); + assert_eq!(spec.name, "is-positive"); + assert_eq!(spec.spec_type, RegistryPackageSpecType::Range); +} + +#[test] +fn npm_alias_with_exact_version_uses_outer_alias_as_name() { + let spec = parse_bare_specifier("npm:1.0.0", Some("@acme/foo"), DEFAULT_TAG, REGISTRY).unwrap(); + assert_eq!(spec.name, "@acme/foo"); + assert_eq!(spec.spec_type, RegistryPackageSpecType::Version); + assert_eq!(spec.fetch_spec, "1.0.0"); +} + +#[test] +fn npm_alias_with_inner_name_and_range() { + let spec = + parse_bare_specifier("npm:lodash@^4.0.0", Some("foo"), DEFAULT_TAG, REGISTRY).unwrap(); + assert_eq!(spec.name, "lodash"); + assert_eq!(spec.fetch_spec, "^4.0.0"); + assert_eq!(spec.spec_type, RegistryPackageSpecType::Range); +} + +#[test] +fn npm_alias_with_inner_scoped_name_and_range() { + let spec = + parse_bare_specifier("npm:@scope/foo@^1.0.0", Some("foo"), DEFAULT_TAG, REGISTRY).unwrap(); + assert_eq!(spec.name, "@scope/foo"); + assert_eq!(spec.fetch_spec, "^1.0.0"); + assert_eq!(spec.spec_type, RegistryPackageSpecType::Range); +} + +#[test] +fn npm_alias_unversioned_falls_back_to_default_tag() { + let spec = parse_bare_specifier("npm:is-positive", None, DEFAULT_TAG, REGISTRY).unwrap(); + assert_eq!(spec.name, "is-positive"); + assert_eq!(spec.fetch_spec, "latest"); + assert_eq!(spec.spec_type, RegistryPackageSpecType::Tag); +} + +#[test] +fn npm_alias_scoped_unversioned_falls_back_to_default_tag() { + let spec = parse_bare_specifier("npm:@scope/foo", None, DEFAULT_TAG, REGISTRY).unwrap(); + assert_eq!(spec.name, "@scope/foo"); + assert_eq!(spec.fetch_spec, "latest"); + assert_eq!(spec.spec_type, RegistryPackageSpecType::Tag); +} + +#[test] +fn tarball_url_under_registry_is_parsed() { + let url = "https://registry.npmjs.org/foo/-/foo-1.0.0.tgz"; + let spec = parse_bare_specifier(url, None, DEFAULT_TAG, REGISTRY).unwrap(); + assert_eq!(spec.name, "foo"); + assert_eq!(spec.fetch_spec, "1.0.0"); + assert_eq!(spec.spec_type, RegistryPackageSpecType::Version); + assert_eq!(spec.normalized_bare_specifier.as_deref(), Some(url)); +} + +#[test] +fn tarball_url_for_scoped_package_decodes_path() { + let url = "https://registry.npmjs.org/@scope/foo/-/foo-1.0.0.tgz"; + let spec = parse_bare_specifier(url, None, DEFAULT_TAG, REGISTRY).unwrap(); + assert_eq!(spec.name, "@scope/foo"); + assert_eq!(spec.fetch_spec, "1.0.0"); + assert_eq!(spec.spec_type, RegistryPackageSpecType::Version); +} + +#[test] +fn unrelated_url_declines() { + let url = "https://example.com/foo/-/foo-1.0.0.tgz"; + assert!(parse_bare_specifier(url, None, DEFAULT_TAG, REGISTRY).is_none()); +} + +#[test] +fn tarball_url_with_mismatched_filename_declines() { + // `/foo/-/bar-1.0.0.tgz` would be a registry-side bug + // (or a typo'd URL); the parser must not silently map it to a + // confused `(name, version)` pair just because the length math + // works out. Anchor on the scopeless-name prefix. + let url = "https://registry.npmjs.org/foo/-/bar-1.0.0.tgz"; + assert!(parse_bare_specifier(url, None, DEFAULT_TAG, REGISTRY).is_none()); +} + +#[test] +fn git_protocol_specifier_declines() { + assert!( + parse_bare_specifier( + "git+ssh://git@github.com/owner/repo", + Some("foo"), + DEFAULT_TAG, + REGISTRY, + ) + .is_none(), + ); +} + +#[test] +fn workspace_protocol_specifier_declines() { + assert!(parse_bare_specifier("workspace:*", Some("foo"), DEFAULT_TAG, REGISTRY).is_none()); +} + +#[test] +fn npm_prefix_without_alias_uses_bare_as_name_and_falls_back_to_default_tag() { + // No outer alias → enter the lastIndexOf('@') branch. `^1.0.0` has + // no `@`, so name = '^1.0.0' and bare = default tag ('latest'). + // Upstream's parser doesn't validate the name; downstream consumers + // surface the malformed name as ERR_PNPM_INVALID_PACKAGE_NAME from + // pick_package's validator. + let spec = parse_bare_specifier("npm:^1.0.0", None, DEFAULT_TAG, REGISTRY).unwrap(); + assert_eq!(spec.name, "^1.0.0"); + assert_eq!(spec.fetch_spec, "latest"); + assert_eq!(spec.spec_type, RegistryPackageSpecType::Tag); +} diff --git a/pacquet/crates/resolving-resolver-base/Cargo.toml b/pacquet/crates/resolving-resolver-base/Cargo.toml index 7ef830e290..e4f23ba205 100644 --- a/pacquet/crates/resolving-resolver-base/Cargo.toml +++ b/pacquet/crates/resolving-resolver-base/Cargo.toml @@ -11,8 +11,10 @@ license.workspace = true repository.workspace = true [dependencies] +pacquet-config = { workspace = true } pacquet-lockfile = { workspace = true } +chrono = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } diff --git a/pacquet/crates/resolving-resolver-base/src/resolve.rs b/pacquet/crates/resolving-resolver-base/src/resolve.rs index bbc4cf07a9..065b6214e6 100644 --- a/pacquet/crates/resolving-resolver-base/src/resolve.rs +++ b/pacquet/crates/resolving-resolver-base/src/resolve.rs @@ -10,6 +10,8 @@ use std::{collections::BTreeMap, future::Future, path::PathBuf, pin::Pin}; +use chrono::{DateTime, Utc}; +use pacquet_config::version_policy::PackageVersionPolicy; use pacquet_lockfile::{LockfileResolution, PkgNameVer}; use serde::{Deserialize, Serialize}; @@ -143,11 +145,6 @@ pub enum UpdateBehavior { /// Options the dispatcher hands a resolver per-resolve. Mirrors pnpm's /// [`ResolveOptions`](https://github.com/pnpm/pnpm/blob/3687b0e180/resolving/resolver-base/src/index.ts#L277-L302). -/// -/// Trust / published-at fields are not modeled yet — they belong to -/// the npm resolver's verifier surface, which already lives at -/// `resolving-npm-resolver`. They'll be added here when the -/// dispatcher's npm leg actually needs to pass them through. #[derive(Debug, Default, Clone)] pub struct ResolveOptions { pub project_dir: PathBuf, @@ -161,6 +158,18 @@ pub struct ResolveOptions { pub update: UpdateBehavior, pub inject_workspace_packages: bool, pub calc_specifier: bool, + /// `minimumReleaseAge` cutoff. Versions published after this point + /// are filtered out by the npm picker (or reported inline via + /// [`ResolveResult::policy_violation`] when no mature pick exists). + /// `None` disables the maturity filter. + pub published_by: Option>, + /// Per-package exclude policy for the maturity filter. `None` + /// applies the filter uniformly. + pub published_by_exclude: Option, + /// `true` suppresses on-disk and in-memory cache write-back during + /// resolution. Mirrors upstream's `dryRun` flag at the resolver + /// boundary. + pub dry_run: bool, } /// In-memory manifest shape a resolver may attach to its