From 84bb4b1a04ac72cfe39a60e4cfc0b03322b8344d Mon Sep 17 00:00:00 2001 From: Zoltan Kochan Date: Thu, 11 Jun 2026 19:39:45 +0200 Subject: [PATCH] perf: close the warm-resolve, symlink-churn, and download-concurrency gaps (#12329) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Motivation The [vlt.sh benchmarks](https://benchmarks.vlt.sh/) (2026-06-11 run, pacquet 0.11.3) show pacquet several times slower than the fastest package managers in the warm-metadata fresh-resolve cells (`cache`: 3.9–8.1x), the cold-cache frozen-install cells (`lockfile`: up to 10x on vue), and `clean`. Profiling the babylon and vue fixtures locally (macOS time profiles of the warm fresh resolve and the install tail) surfaced three independent causes, fixed here. ## Changes ### 1. Deprecation probing without manifest hydration (pacquet) With `minimumReleaseAge` active (the default), every range pick runs `filter_pkg_metadata_by_publish_date`, and any dist-tag pointing outside the maturity cutoff (`next`, `beta`, `canary`, a too-fresh `latest`) repopulates by scanning all candidates and reading each candidate's `deprecated` flag. Each read hydrated the full version manifest — a complete `serde_json` parse including the flattened catch-all map. On babylon's warm fresh resolve this was the single largest CPU consumer (~10 thread-seconds, all on the resolve task's critical path). `PackageVersions::is_deprecated` now answers from the raw fragment (substring pre-check, then a single-field deserialize with the same normalization as `PackageVersion::deprecated`), the tag-repopulation loop parses candidate versions once per filter call (mirroring the `parsedSemverCache` in pnpm's `filterPkgMetadataByPublishDate`), and the deprecated-pick fallback uses the probe instead of hydrating every version. **babylon warm fresh resolve: `resolve_workspace` 7.5s → 2.6s.** ### 2. Relative-symlink up-to-date check (pacquet) `force_symlink_dir` joined an existing link's relative contents onto the link parent and compared the result *verbatim* against the wanted target. Virtual-store links contain `..` segments (`../..//node_modules/`), so the joined path never compared equal and every up-to-date symlink was unlinked and recreated. Node's `path.relative` — which upstream `symlink-dir`'s `isExistingSymlinkUpToDate` builds on — resolves its arguments, so pnpm treats those links as current. Both sides now pass through `lexical_normalize`. The babylon install tail was dominated by exactly this unlink+symlink churn. **babylon warm install: 6.8s → 4.7s; warm frozen install: 4.2s → 2.3s.** ### 3. Default network concurrency floor 16 → 64 (pnpm + pacquet) The default was `min(64, max(workers * 3, 16))`. Downloads are I/O-bound, not CPU-bound: on a 4-vCPU CI runner the formula yields 16 concurrent requests, so a low-latency registry drains 600–1300-tarball installs 16 at a time while staying unsaturated — a large share of the cold-cell (`lockfile`/`clean`) gap on the benchmark runners. The default is now `min(96, max(workers * 3, 64))`; the `networkConcurrency` setting still overrides it. Applied to `@pnpm/installing.package-requester`, the lockfile-resolution verifier fan-out that mirrors its floor, and the same two spots in pacquet. Changeset included (minor). **This is a user-visible defaults change on both stacks — flagging it explicitly for review.** ## Local results (M-series macOS, vlt fixtures, isolated store/cache) | cell | before | after | |---|---|---| | vue `cache` | 1159 ms | **479 ms** | | vue `cache+lockfile` | 621 ms | **392 ms** | | vue no-op install | 48 ms | **41 ms** | | babylon `cache` | ~8.8 s | **4.75 s** | | babylon `cache+lockfile` | ~4.2 s | **2.27 s** | vue's warm cells are now ahead of every competitor measured locally; babylon's `cache` cell closed from ~2.5x behind the leader to ~1.35x (the remainder is the per-file store-integrity verify and per-file linking that the pnpm store contract requires). ## Validation - `cargo nextest`: registry, resolving-npm-resolver, resolving-deps-resolver, lockfile-verification, network, fs, tarball, package-manager, cli — 1300+ tests, all green; new unit tests cover the deprecation probe (string/bool/empty/corrupt shapes, nested-key false positives) and cross-parent relative-symlink reuse (fails without the fix). - Lockfile stability: `--lockfile-only` output is byte-identical before/after on vue; on babylon the resolved **package-version sets are identical across 6 runs (3 per binary)**. The babylon lockfile does flap between runs in the peer-suffix shape of `webpack-dev-server@5.2.2` (`(bufferutil@4.1.0)(utf-8-validate@5.0.10)` appearing/disappearing) — this is **pre-existing nondeterminism** reproducible with the unmodified binary against itself, in the optional-peer area; worth a separate issue. - Pre-push checks (fmt, taplo, `cargo doc -D warnings`, dylint) pass; eslint (root config) and `tsgo --build` pass for the two touched TS packages. --- .../raise-default-network-concurrency.md | 7 ++ .github/workflows/pacquet-ci.yml | 11 ++++ .github/workflows/test.yml | 9 +++ .../src/install/verifyLockfileResolutions.ts | 6 +- .../package-requester/src/packageRequester.ts | 6 +- pacquet/crates/fs/src/symlink_dir.rs | 12 ++-- pacquet/crates/fs/src/symlink_dir/tests.rs | 33 ++++++++++ .../src/verify_lockfile_resolutions.rs | 6 +- pacquet/crates/network/src/lib.rs | 12 ++-- pacquet/crates/network/src/tests.rs | 11 ++++ .../crates/registry/src/package_version.rs | 2 +- .../crates/registry/src/package_versions.rs | 41 +++++++++++- .../registry/src/package_versions/tests.rs | 57 ++++++++++++++++ .../src/pick_package_from_meta.rs | 60 ++++++++++------- .../src/pick_package_from_meta/tests.rs | 65 +++++++++++++++++++ 15 files changed, 298 insertions(+), 40 deletions(-) create mode 100644 .changeset/raise-default-network-concurrency.md diff --git a/.changeset/raise-default-network-concurrency.md b/.changeset/raise-default-network-concurrency.md new file mode 100644 index 0000000000..6c2eccc04d --- /dev/null +++ b/.changeset/raise-default-network-concurrency.md @@ -0,0 +1,7 @@ +--- +"@pnpm/installing.package-requester": minor +"@pnpm/installing.deps-installer": minor +"pnpm": minor +--- + +Raised the default network concurrency from `min(64, max(cpuCores * 3, 16))` to `min(96, max(cpuCores * 3, 64))`. Package downloads are I/O-bound, not CPU-bound, so deriving the floor from the core count left machines with few cores (for example 4-vCPU CI runners) downloading only 16 tarballs at a time and unable to saturate a low-latency registry. The `networkConcurrency` setting still overrides the default. diff --git a/.github/workflows/pacquet-ci.yml b/.github/workflows/pacquet-ci.yml index d31e52b0e7..afc41d5971 100644 --- a/.github/workflows/pacquet-ci.yml +++ b/.github/workflows/pacquet-ci.yml @@ -70,6 +70,17 @@ jobs: with: persist-credentials: false + # The clippy --all-targets pass plus the nextest build hold two + # full sets of debug artifacts in target/, which no longer fits + # in the hosted runner's free disk — the linker dies with ENOSPC + # / SIGBUS mid-build. Drop the preinstalled toolchains this job + # never uses (~25 GB) before compiling anything. + - name: Free up runner disk space + if: runner.os == 'Linux' + run: | + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL + df -h / + - name: Install Rust Toolchain uses: ./.github/actions/rustup with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 85ed9654ca..5594aea42d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,6 +30,15 @@ jobs: runs-on: ${{ inputs.platform }} steps: + # A near-full "affected packages" sweep plus the pnpr Rust build + # can exhaust the hosted runner's free disk mid-test (the runner + # worker itself dies with "No space left on device"). Drop the + # preinstalled toolchains this job never uses (~25 GB) first. + - name: Free up runner disk space + if: ${{ runner.os == 'Linux' }} + run: | + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache/CodeQL + df -h / - name: Configure Git run: | git config --global core.autocrlf false diff --git a/installing/deps-installer/src/install/verifyLockfileResolutions.ts b/installing/deps-installer/src/install/verifyLockfileResolutions.ts index c601211fcb..70f5297d53 100644 --- a/installing/deps-installer/src/install/verifyLockfileResolutions.ts +++ b/installing/deps-installer/src/install/verifyLockfileResolutions.ts @@ -27,10 +27,10 @@ export type { ResolutionPolicyViolation } // count is in the header and the remainder is summarized at the end. const MAX_VIOLATIONS_TO_PRINT = 20 -// 16 mirrors the floor of pnpm's package-requester network-concurrency -// (Math.min(64, Math.max(workers*3, 16))); keep them aligned so the +// 64 mirrors the floor of pnpm's package-requester network-concurrency +// (Math.min(96, Math.max(workers*3, 64))); keep them aligned so the // verification pass doesn't push past what the rest of the install respects. -const DEFAULT_CONCURRENCY = 16 +const DEFAULT_CONCURRENCY = 64 export const RESOLUTION_SHAPE_MISMATCH_VIOLATION_CODE = 'RESOLUTION_SHAPE_MISMATCH' diff --git a/installing/package-requester/src/packageRequester.ts b/installing/package-requester/src/packageRequester.ts index 93428c270c..2803ab63ef 100644 --- a/installing/package-requester/src/packageRequester.ts +++ b/installing/package-requester/src/packageRequester.ts @@ -94,7 +94,11 @@ export function createPackageRequester ( } { opts = opts || {} - const networkConcurrency = opts.networkConcurrency ?? Math.min(64, Math.max(calcMaxWorkers() * 3, 16)) + // Downloads are I/O-bound, not CPU-bound: a low-latency registry only + // saturates with enough requests in flight, so the floor matters more + // than the per-core scaling — a CPU-derived floor left 4-core CI + // runners draining multi-hundred-tarball installs 16 at a time. + const networkConcurrency = opts.networkConcurrency ?? Math.min(96, Math.max(calcMaxWorkers() * 3, 64)) const requestsQueue = new PQueue({ concurrency: networkConcurrency, }) diff --git a/pacquet/crates/fs/src/symlink_dir.rs b/pacquet/crates/fs/src/symlink_dir.rs index 5bb2660dae..15059aa881 100644 --- a/pacquet/crates/fs/src/symlink_dir.rs +++ b/pacquet/crates/fs/src/symlink_dir.rs @@ -269,16 +269,20 @@ fn force_symlink_inner( /// existing link's contents to an absolute path (using `link`'s /// parent dir when the contents are relative), then compare lexically /// against `wanted`. Single-level — does not follow chained symlinks. +/// +/// Both sides pass through [`fn@crate::lexical_normalize`] before comparing. +/// Node's `path.relative` resolves its arguments, so the `..` +/// segments in the relative link contents [`symlink_dir`] writes +/// collapse on the upstream side; without the same collapse here, +/// every up-to-date relative symlink reads as stale and pays an +/// unlink + recreate. fn existing_symlink_up_to_date(wanted: &Path, link: &Path, existing_link_string: &Path) -> bool { let existing_absolute = if existing_link_string.is_absolute() { existing_link_string.to_path_buf() } else { link.parent().unwrap_or_else(|| Path::new("")).join(existing_link_string) }; - // `path.relative(a, b) === ''` returns true iff lexically equal, - // so a direct PathBuf equality check is the right Rust analogue - // once both sides are absolute. - existing_absolute == wanted + crate::lexical_normalize(&existing_absolute) == crate::lexical_normalize(wanted) } /// Remove a regular file or directory that's occupying a symlink diff --git a/pacquet/crates/fs/src/symlink_dir/tests.rs b/pacquet/crates/fs/src/symlink_dir/tests.rs index b6a696d91b..9680c3a140 100644 --- a/pacquet/crates/fs/src/symlink_dir/tests.rs +++ b/pacquet/crates/fs/src/symlink_dir/tests.rs @@ -209,3 +209,36 @@ fn read_symlink_dir_reads_back_what_force_symlink_dir_wrote() { .expect("canonicalize read-back path"); assert_eq!(resolved_read, fs::canonicalize(&target).expect("canonicalize target")); } + +/// Same reuse contract when the link lives in a *different* parent +/// directory than the target, so the on-disk link contents contain +/// `..` segments (the virtual-store layout shape: +/// `.pnpm/a@1/node_modules/b` → `.pnpm/b@2/node_modules/b`). The +/// up-to-date check must collapse those segments like Node's +/// `path.relative` does; comparing the joined path verbatim reads +/// every such link as stale. +#[test] +fn force_symlink_dir_reuses_relative_link_across_parents() { + let root = tempdir().expect("create temp dir"); + let target = root.path().join("store").join("b@2").join("node_modules").join("b"); + let link = root.path().join("store").join("a@1").join("node_modules").join("b"); + fs::create_dir_all(&target).expect("create target dir"); + + let first = force_symlink_dir(&target, &link).expect("first call"); + assert_eq!(first, ForceSymlinkOutcome { reused: false, warning: None }); + #[cfg(unix)] + { + let contents = fs::read_link(&link).expect("read link"); + assert!( + contents.to_string_lossy().contains(".."), + "link contents should be relative with parent segments: {contents:?}", + ); + } + + let second = force_symlink_dir(&target, &link).expect("second call"); + assert_eq!( + second, + ForceSymlinkOutcome { reused: true, warning: None }, + "an up-to-date relative link must be reused, not rewritten", + ); +} diff --git a/pacquet/crates/lockfile-verification/src/verify_lockfile_resolutions.rs b/pacquet/crates/lockfile-verification/src/verify_lockfile_resolutions.rs index 42ec5361e7..f395f85a0e 100644 --- a/pacquet/crates/lockfile-verification/src/verify_lockfile_resolutions.rs +++ b/pacquet/crates/lockfile-verification/src/verify_lockfile_resolutions.rs @@ -35,9 +35,9 @@ use crate::{ }; /// Default concurrency cap for the per-candidate fan-out. Mirrors -/// upstream's `DEFAULT_CONCURRENCY = 16` (the floor of pnpm's +/// upstream's `DEFAULT_CONCURRENCY = 64` (the floor of pnpm's /// `package-requester` network-concurrency formula). -const DEFAULT_CONCURRENCY: usize = 16; +const DEFAULT_CONCURRENCY: usize = 64; /// Options bundle for [`verify_lockfile_resolutions`]. Mirrors /// upstream's @@ -45,7 +45,7 @@ const DEFAULT_CONCURRENCY: usize = 16; #[derive(Debug, Default, Clone)] pub struct VerifyLockfileResolutionsOptions<'a> { /// Cap on concurrent verifier futures. `None` falls back to - /// the internal `DEFAULT_CONCURRENCY` (`16`, matching upstream). + /// the internal `DEFAULT_CONCURRENCY` (`64`, matching upstream). pub concurrency: Option, /// Absolute path of the lockfile being verified. Required for /// the on-disk verification cache (the stat shortcut + per-path diff --git a/pacquet/crates/network/src/lib.rs b/pacquet/crates/network/src/lib.rs index fde7cd0304..e00c9b581d 100644 --- a/pacquet/crates/network/src/lib.rs +++ b/pacquet/crates/network/src/lib.rs @@ -591,12 +591,16 @@ fn build_scheme_proxy( /// [`worker/src/index.ts`](https://github.com/pnpm/pnpm/blob/1819226b51/worker/src/index.ts#L63-L72): /// /// ```ts -/// networkConcurrency = Math.min(64, Math.max(calcMaxWorkers() * 3, 16)) +/// networkConcurrency = Math.min(96, Math.max(calcMaxWorkers() * 3, 64)) /// // calcMaxWorkers() = Math.max(1, availableParallelism() - 1) /// ``` /// -/// Concretely: 16 on a 4-core machine, 21 on 8-core, 27 on 10-core, -/// 45 on 16-core, capped at 64. +/// Concretely: 64 up to a 22-core machine, scaling with cores beyond +/// that, capped at 96. The floor matters more than the scaling: +/// downloads are I/O-bound, not CPU-bound, and a low-latency registry +/// only saturates when enough requests are in flight — a CPU-derived +/// floor left 4-core CI runners draining 600-tarball installs 16 at a +/// time, several times slower than the same network could serve. /// /// Uses [`std::thread::available_parallelism`] rather than /// `num_cpus::get()` so cgroup / CPU-quota limits in containers and @@ -608,7 +612,7 @@ fn build_scheme_proxy( pub fn default_network_concurrency() -> usize { let available_parallelism = std::thread::available_parallelism().map_or(1, NonZeroUsize::get); let max_workers = available_parallelism.saturating_sub(1).max(1); - max_workers.saturating_mul(3).clamp(16, 64) + max_workers.saturating_mul(3).clamp(64, 96) } /// This is only necessary for tests. diff --git a/pacquet/crates/network/src/tests.rs b/pacquet/crates/network/src/tests.rs index 8d243835ce..16b553d730 100644 --- a/pacquet/crates/network/src/tests.rs +++ b/pacquet/crates/network/src/tests.rs @@ -678,3 +678,14 @@ fn for_installs_falls_back_on_unencodable_user_agent() { ) .expect("unencodable user-agent falls back to default"); } + +/// Pins the floor and cap of the default request-concurrency formula. +/// The floor exists because downloads are I/O-bound: deriving it from +/// the core count left low-core CI runners draining multi-hundred- +/// tarball installs 16 requests at a time without saturating a +/// low-latency registry. +#[test] +fn default_network_concurrency_stays_within_floor_and_cap() { + let concurrency = super::default_network_concurrency(); + assert!((64..=96).contains(&concurrency), "got {concurrency}"); +} diff --git a/pacquet/crates/registry/src/package_version.rs b/pacquet/crates/registry/src/package_version.rs index bb46cc3fe8..cdce3918e2 100644 --- a/pacquet/crates/registry/src/package_version.rs +++ b/pacquet/crates/registry/src/package_version.rs @@ -151,7 +151,7 @@ where /// A bool `true` becomes `Some("")`, a bool `false` becomes `None`; /// a string stays as `Some(s)`. Missing field defaults to `None` via /// the `#[serde(default)]` on the field itself. -fn deserialize_deprecated_field<'de, Deser>( +pub(crate) fn deserialize_deprecated_field<'de, Deser>( deserializer: Deser, ) -> Result, Deser::Error> where diff --git a/pacquet/crates/registry/src/package_versions.rs b/pacquet/crates/registry/src/package_versions.rs index 04c93799c4..3265435c54 100644 --- a/pacquet/crates/registry/src/package_versions.rs +++ b/pacquet/crates/registry/src/package_versions.rs @@ -24,7 +24,16 @@ use std::{ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_json::value::RawValue; -use crate::package_version::PackageVersion; +use crate::package_version::{PackageVersion, deserialize_deprecated_field}; + +/// Single-field view of a version manifest for +/// [`PackageVersions::is_deprecated`] — same normalization as +/// [`PackageVersion::deprecated`], every other field skipped. +#[derive(Deserialize)] +struct DeprecatedProbe { + #[serde(default, deserialize_with = "deserialize_deprecated_field")] + deprecated: Option, +} #[derive(Debug, Default, Clone)] pub struct PackageVersions { @@ -141,6 +150,36 @@ impl PackageVersions { self.slots.contains_key(version) } + /// Whether `version` is marked deprecated, equivalent to + /// `get(version).is_some_and(|manifest| manifest.deprecated.is_some())` + /// but without hydrating the full manifest: an unhydrated fragment + /// is probed with a single-field deserialize (skipped entirely when + /// the fragment text doesn't contain the `"deprecated"` key). + /// An absent version or an undecodable fragment reads as not + /// deprecated, matching the "undecodable fragment behaves as if + /// the version were absent" contract of [`PackageVersions::get`]. + /// + /// The pick paths consult deprecation for *many* candidate + /// versions per packument (dist-tag repopulation, the + /// deprecated-pick fallback); hydrating each candidate parses its + /// whole manifest — including the flattened catch-all map — and + /// dominated warm-resolve CPU. + #[must_use] + pub fn is_deprecated(&self, version: &str) -> bool { + let Some(slot) = self.slots.get(version) else { return false }; + if let Some(parsed) = slot.parsed.get() { + return parsed.as_ref().is_some_and(|manifest| manifest.deprecated.is_some()); + } + let Some(json) = slot.source.json() else { return false }; + // Absence of the key text anywhere in the fragment proves the + // field is absent; presence (possibly in an unrelated nested + // position) falls through to the real parse. + if !json.contains(r#""deprecated""#) { + return false; + } + serde_json::from_str::(&json).is_ok_and(|probe| probe.deprecated.is_some()) + } + /// Version strings, in `HashMap` order. Never hydrates. pub fn keys(&self) -> impl Iterator { self.slots.keys() diff --git a/pacquet/crates/registry/src/package_versions/tests.rs b/pacquet/crates/registry/src/package_versions/tests.rs index d1108cd602..d717f293d6 100644 --- a/pacquet/crates/registry/src/package_versions/tests.rs +++ b/pacquet/crates/registry/src/package_versions/tests.rs @@ -126,3 +126,60 @@ fn latest_returns_none_for_dangling_or_undecodable_tag() { parse_package(r#"{"name": "foo", "dist-tags": {"latest": "9.9.9"}, "versions": {}}"#); assert!(dangling.latest().is_none()); } + +#[test] +fn is_deprecated_probes_without_hydrating() { + let package = parse_package( + r#"{ + "name": "foo", + "dist-tags": {}, + "versions": { + "1.0.0": {"name": "foo", "version": "1.0.0", "dist": {"integrity": "sha512-a", "tarball": "https://r/foo-1.0.0.tgz"}}, + "1.1.0": {"name": "foo", "version": "1.1.0", "deprecated": "use 2.x", "dist": {"integrity": "sha512-b", "tarball": "https://r/foo-1.1.0.tgz"}}, + "1.2.0": {"name": "foo", "version": "1.2.0", "deprecated": false, "dist": {"integrity": "sha512-c", "tarball": "https://r/foo-1.2.0.tgz"}}, + "1.3.0": {"name": "foo", "version": "1.3.0", "deprecated": true, "dist": {"integrity": "sha512-d", "tarball": "https://r/foo-1.3.0.tgz"}}, + "1.4.0": {"name": "foo", "version": "1.4.0", "deprecated": "", "dist": {"integrity": "sha512-e", "tarball": "https://r/foo-1.4.0.tgz"}}, + "1.9.0": {"corrupt": "fragment", "deprecated": 1} + } + }"#, + ); + + assert!(!package.versions.is_deprecated("1.0.0")); + assert!(package.versions.is_deprecated("1.1.0")); + assert!(!package.versions.is_deprecated("1.2.0")); + assert!(package.versions.is_deprecated("1.3.0")); + assert!(package.versions.is_deprecated("1.4.0")); + // Absent version and undecodable-probe fragments read as not + // deprecated, matching the absent-version contract of `get`. + assert!(!package.versions.is_deprecated("9.9.9")); + assert!(!package.versions.is_deprecated("1.9.0")); + + // The probe must agree with the hydrated field on every slot it + // can hydrate, and must not have hydrated anything itself: `get` + // still parses fresh (no cached Arc identity from the probe). + for version in ["1.0.0", "1.1.0", "1.2.0", "1.3.0", "1.4.0"] { + let manifest = package.versions.get(version).expect("hydrate"); + assert_eq!( + package.versions.is_deprecated(version), + manifest.deprecated.is_some(), + "probe vs hydrated disagree for {version}", + ); + } +} + +/// A fragment that mentions `"deprecated"` only in an unrelated nested +/// position (a dependency literally named `deprecated`) must fall +/// through to the real parse and report not-deprecated. +#[test] +fn is_deprecated_ignores_unrelated_key_text() { + let package = parse_package( + r#"{ + "name": "foo", + "dist-tags": {}, + "versions": { + "1.0.0": {"name": "foo", "version": "1.0.0", "dependencies": {"deprecated": "^0.0.2"}, "dist": {"integrity": "sha512-a", "tarball": "https://r/foo-1.0.0.tgz"}} + } + }"#, + ); + assert!(!package.versions.is_deprecated("1.0.0")); +} diff --git a/pacquet/crates/resolving-npm-resolver/src/pick_package_from_meta.rs b/pacquet/crates/resolving-npm-resolver/src/pick_package_from_meta.rs index 80dc428bdc..4fafbf2011 100644 --- a/pacquet/crates/resolving-npm-resolver/src/pick_package_from_meta.rs +++ b/pacquet/crates/resolving-npm-resolver/src/pick_package_from_meta.rs @@ -27,6 +27,7 @@ //! module pulls in no I/O. use std::{ + cell::OnceCell, collections::BTreeMap, sync::{Arc, LazyLock}, }; @@ -345,15 +346,12 @@ pub fn pick_version_by_version_range( // non-deprecated subset. Matches upstream's loop at // pickPackageFromMeta.ts#L194-L201. if let Some(ref picked) = max_pick { - let picked_meta = opts.meta.versions.get(picked); - let picked_is_deprecated = picked_meta.is_some_and(|version| version.deprecated.is_some()); + let picked_is_deprecated = opts.meta.versions.is_deprecated(picked); if picked_is_deprecated && all_versions.len() > 1 { - let non_deprecated: Vec<&str> = opts - .meta - .versions + let non_deprecated: Vec<&str> = all_versions .iter() - .filter(|(_, manifest)| manifest.deprecated.is_none()) - .map(|(name, _)| name.as_str()) + .copied() + .filter(|version| !opts.meta.versions.is_deprecated(version)) .collect(); if let Some(non_deprecated_max) = max_satisfying(&non_deprecated, opts.version_range) { return Some(non_deprecated_max); @@ -431,6 +429,14 @@ pub fn filter_pkg_metadata_by_publish_date( }); let mut dist_tags_within_date = std::collections::HashMap::new(); + // Candidate versions parsed once per filter call and shared by + // every repopulated tag, with the deprecation flag resolved + // lazily per candidate — mirrors upstream's `parsedSemverCache`. + // Deprecation goes through [`PackageVersions::is_deprecated`] + // instead of hydrating each candidate's manifest; the hydration + // per comparison dominated warm-resolve CPU on packuments with + // out-of-cutoff dist-tags. + let mut parsed_candidates: Option)>> = None; for (tag, version) in &meta.dist_tags { if versions_within_date.contains_key(version) { dist_tags_within_date.insert(tag.clone(), version.clone()); @@ -438,35 +444,43 @@ pub fn filter_pkg_metadata_by_publish_date( } let Ok(original) = Version::parse(version) else { continue }; let original_is_prerelease = !original.pre_release.is_empty(); - let mut best_version: Option<(Version, &String)> = None; - for candidate_raw in versions_within_date.keys() { - let Ok(candidate) = Version::parse(candidate_raw) else { continue }; + let candidates = parsed_candidates.get_or_insert_with(|| { + versions_within_date + .keys() + .filter_map(|raw| { + Version::parse(raw).ok().map(|parsed| (parsed, raw, OnceCell::new())) + }) + .collect() + }); + let deprecated = |slot: &(Version, &String, OnceCell)| -> bool { + *slot.2.get_or_init(|| versions_within_date.is_deprecated(slot.1)) + }; + let mut best_index: Option = None; + for (index, slot) in candidates.iter().enumerate() { + let (candidate, _, _) = slot; if tag != "latest" && candidate.major != original.major { continue; } if candidate.pre_release.is_empty() == original_is_prerelease { continue; } - match best_version { - None => best_version = Some((candidate, candidate_raw)), - Some((ref best, best_raw)) => { - let best_deprecated = versions_within_date - .get(best_raw) - .is_some_and(|manifest| manifest.deprecated.is_some()); - let candidate_deprecated = versions_within_date - .get(candidate_raw) - .is_some_and(|manifest| manifest.deprecated.is_some()); - let candidate_wins = (candidate > *best + match best_index { + None => best_index = Some(index), + Some(best) => { + let best_slot = &candidates[best]; + let best_deprecated = deprecated(best_slot); + let candidate_deprecated = deprecated(slot); + let candidate_wins = (*candidate > best_slot.0 && best_deprecated == candidate_deprecated) || (best_deprecated && !candidate_deprecated); if candidate_wins { - best_version = Some((candidate, candidate_raw)); + best_index = Some(index); } } } } - if let Some((_, best_raw)) = best_version { - dist_tags_within_date.insert(tag.clone(), best_raw.clone()); + if let Some(best) = best_index { + dist_tags_within_date.insert(tag.clone(), candidates[best].1.clone()); } } diff --git a/pacquet/crates/resolving-npm-resolver/src/pick_package_from_meta/tests.rs b/pacquet/crates/resolving-npm-resolver/src/pick_package_from_meta/tests.rs index 7da94d6d4b..bfeb09826b 100644 --- a/pacquet/crates/resolving-npm-resolver/src/pick_package_from_meta/tests.rs +++ b/pacquet/crates/resolving-npm-resolver/src/pick_package_from_meta/tests.rs @@ -602,3 +602,68 @@ fn pick_from_meta_returns_none_for_undecodable_exact_version() { .expect("ok"); assert!(picked.is_none()); } + +/// The dist-tag repopulation tie-break prefers a non-deprecated +/// candidate over a higher deprecated one (and only falls back to +/// deprecated candidates when nothing else matches the tag family). +#[test] +fn filter_tag_rewrite_prefers_non_deprecated_candidate() { + let mut pkg = make_package( + "acme", + &[ + ("2.1.0", None), + ("2.2.0", Some("use 3.x")), + ("2.5.0", None), // dropped by the cutoff + ], + &[("old", "2.5.0")], + ); + pkg.time = Some(make_time_map(&[ + ("2.1.0", "2024-01-01T00:00:00.000Z"), + ("2.2.0", "2024-06-01T00:00:00.000Z"), + ("2.5.0", "2025-06-01T00:00:00.000Z"), + ])); + let cutoff = parse_iso("2025-01-01T00:00:00.000Z"); + let filtered = filter_pkg_metadata_by_publish_date(&pkg, cutoff, None); + assert_eq!( + filtered.dist_tag("old"), + Some("2.1.0"), + "non-deprecated 2.1.0 must beat the higher deprecated 2.2.0", + ); +} + +/// The repopulation tie-break must read deprecation from the raw +/// fragments without full-manifest hydration. The fragments here are +/// valid JSON but not decodable as complete version manifests, so a +/// tie-break that hydrates to check `deprecated` sees every candidate +/// as undecodable (= not deprecated) and picks the higher deprecated +/// version instead. Guards the no-hydration deprecation probe the +/// publish-date filter relies on — also matching pnpm, which reads +/// `versions[candidate].deprecated` off plain parsed JSON without +/// validating the rest of the entry. +#[test] +fn filter_tag_rewrite_reads_deprecation_from_raw_fragments() { + let mut pkg: Package = serde_json::from_str( + r#"{ + "name": "acme", + "dist-tags": {"old": "2.5.0"}, + "versions": { + "2.1.0": {"name": "acme"}, + "2.2.0": {"name": "acme", "deprecated": "use 3.x"}, + "2.5.0": {"name": "acme"} + } + }"#, + ) + .expect("parse package"); + pkg.time = Some(make_time_map(&[ + ("2.1.0", "2024-01-01T00:00:00.000Z"), + ("2.2.0", "2024-06-01T00:00:00.000Z"), + ("2.5.0", "2025-06-01T00:00:00.000Z"), + ])); + let cutoff = parse_iso("2025-01-01T00:00:00.000Z"); + let filtered = filter_pkg_metadata_by_publish_date(&pkg, cutoff, None); + assert_eq!( + filtered.dist_tag("old"), + Some("2.1.0"), + "deprecation must be read from the raw fragment, not via hydration", + ); +}