From 43b5bf75202b41684ee17d3faac03bbcdf7b1ebd Mon Sep 17 00:00:00 2001 From: Zoltan Kochan Date: Fri, 12 Jun 2026 17:27:13 +0200 Subject: [PATCH] perf(pacquet): cache build metadata during installs (#12360) * perf(pacquet): cache build metadata during installs * fix(pacquet): satisfy clippy for build helpers * fix(pacquet): reduce prefetch row type complexity --- Cargo.lock | 1 + .../package-manager/src/build_modules.rs | 33 ++++---- .../src/build_modules/tests.rs | 64 ++++++++++++++- .../src/create_virtual_store.rs | 49 ++++++++++-- .../src/create_virtual_store/tests.rs | 16 +++- .../src/install_frozen_lockfile.rs | 2 + .../src/install_with_fresh_lockfile.rs | 2 + pacquet/crates/package-manifest/src/lib.rs | 26 +++++++ pacquet/crates/tarball/Cargo.toml | 11 +-- pacquet/crates/tarball/src/lib.rs | 43 +++++++++-- pacquet/crates/tarball/src/tests.rs | 77 +++++++++++++++++++ 11 files changed, 286 insertions(+), 38 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0e48da5c16..db81082a29 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4366,6 +4366,7 @@ dependencies = [ "pacquet-diagnostics", "pacquet-fs", "pacquet-network", + "pacquet-package-manifest", "pacquet-reporter", "pacquet-store-dir", "pipe-trait", diff --git a/pacquet/crates/package-manager/src/build_modules.rs b/pacquet/crates/package-manager/src/build_modules.rs index f6dd7ad638..61a4e619b5 100644 --- a/pacquet/crates/package-manager/src/build_modules.rs +++ b/pacquet/crates/package-manager/src/build_modules.rs @@ -300,6 +300,10 @@ pub struct BuildModules<'a> { /// disabled or no rows were prefetched; the gate falls through /// to "rebuild" for every snapshot. pub side_effects_maps_by_snapshot: Option<&'a crate::SideEffectsMapsBySnapshot>, + /// Per-snapshot `requiresBuild` values from the warm-cache + /// prefetch. Missing entries fall back to inspecting the + /// materialized package directory. + pub requires_build_by_snapshot: Option<&'a crate::RequiresBuildBySnapshot>, /// `;;node` — the prefix part of /// upstream's dep-state cache key. Computed once at install /// start by [`pacquet_graph_hasher::detect_node_major`] + @@ -421,6 +425,7 @@ impl BuildModules<'_> { importers, allow_build_policy, side_effects_maps_by_snapshot, + requires_build_by_snapshot, engine_name, side_effects_cache, side_effects_cache_write, @@ -440,14 +445,9 @@ impl BuildModules<'_> { let extra_env = HashMap::new(); - // Compute requires_build per snapshot from each extracted package - // directory. Mirrors upstream where the worker computes - // `node.requiresBuild` from the package's manifest scripts and the - // presence of `binding.gyp` / `.hooks/` after extraction - // (`https://github.com/pnpm/pnpm/blob/80037699fb/building/pkg-requires-build/src/index.ts`). - // Pacquet does this here rather than in a worker because the worker - // does not exist yet — it is the same per-package on-disk inspection, - // moved to the build entry point. + // Compute `requiresBuild` per snapshot. Warm store-index rows + // already carry the upstream worker's answer, so only misses + // need to inspect the materialized package directory. let requires_build_map: HashMap = snapshots .keys() // Skip snapshots that never landed on disk. `pkg_requires_build` @@ -457,14 +457,15 @@ impl BuildModules<'_> { // optional fan-out. .filter(|key| !skipped.contains(key)) .map(|key| { - // Hoisted snapshots without a recorded `pkgRoot` (the - // walker dropped them) get `requires_build = false` - // so they fall through both the script-runner and the - // patch-apply gates without a syscall, matching the - // isolated path's `pkg_dir.exists() == false` skip. - let requires = pkg_root_for_key(layout, pkg_root_by_key, key) - .as_deref() - .is_some_and(pkg_requires_build); + let pkg_root = pkg_root_for_key(layout, pkg_root_by_key, key); + let requires = match ( + pkg_root.as_deref(), + requires_build_by_snapshot.and_then(|map| map.get(key).copied()), + ) { + (None, _) => false, + (_, Some(requires)) => requires, + (Some(pkg_root), None) => pkg_requires_build(pkg_root), + }; (key.clone(), requires) }) .collect(); diff --git a/pacquet/crates/package-manager/src/build_modules/tests.rs b/pacquet/crates/package-manager/src/build_modules/tests.rs index f7caf27d5a..eb88665d65 100644 --- a/pacquet/crates/package-manager/src/build_modules/tests.rs +++ b/pacquet/crates/package-manager/src/build_modules/tests.rs @@ -1,5 +1,5 @@ use super::{AllowBuildPolicy, BuildModules, parse_name_version_from_key}; -use crate::{SkippedSnapshots, VirtualStoreLayout}; +use crate::{RequiresBuildBySnapshot, SkippedSnapshots, VirtualStoreLayout}; use pacquet_config::Config; use pacquet_executor::ScriptsPrependNodePath; use pacquet_lockfile::{ @@ -332,6 +332,7 @@ fn build_modules_collects_ignored_builds() { packages: None, allow_build_policy: &policy, side_effects_maps_by_snapshot: None, + requires_build_by_snapshot: None, engine_name: None, side_effects_cache: true, side_effects_cache_write: false, @@ -358,6 +359,54 @@ fn build_modules_collects_ignored_builds() { ); } +#[test] +fn cached_requires_build_false_skips_package_dir_probe() { + let pkg_key = key("aaa", "1.0.0"); + let snapshots = HashMap::from([(pkg_key.clone(), SnapshotEntry::default())]); + let importers = root_importers(&[("aaa", "1.0.0")]); + let policy = AllowBuildPolicy::default(); + + let virtual_store_dir = tempdir().expect("create temp dir"); + let modules_dir = tempdir().expect("create temp dir"); + let lockfile_dir = tempdir().expect("create temp dir"); + + create_buildable_pkg(virtual_store_dir.path(), &pkg_key); + let requires_build_by_snapshot = RequiresBuildBySnapshot::from([(pkg_key, false)]); + + let ignored = BuildModules { + layout: &VirtualStoreLayout::legacy( + virtual_store_dir.path(), + pacquet_config::default_virtual_store_dir_max_length() as usize, + ), + modules_dir: modules_dir.path(), + lockfile_dir: lockfile_dir.path(), + snapshots: Some(&snapshots), + importers: &importers, + packages: None, + allow_build_policy: &policy, + side_effects_maps_by_snapshot: None, + requires_build_by_snapshot: Some(&requires_build_by_snapshot), + engine_name: None, + side_effects_cache: true, + side_effects_cache_write: false, + store_dir: None, + store_index_writer: None, + patches: None, + + scripts_prepend_node_path: ScriptsPrependNodePath::Never, + unsafe_perm: true, + child_concurrency: 1, + skipped: &SkippedSnapshots::default(), + pkg_root_by_key: None, + gather_ancestor_bin_paths: false, + frozen_store: false, + } + .run::() + .expect("run BuildModules"); + + assert!(ignored.is_empty()); +} + /// Parallel-path variant of [`build_modules_collects_ignored_builds`] /// running under `child_concurrency: 2` so the rayon /// `chunk.par_iter().try_for_each(...)` dispatch is actually @@ -405,6 +454,7 @@ fn build_modules_collects_ignored_builds_under_concurrency() { packages: None, allow_build_policy: &policy, side_effects_maps_by_snapshot: None, + requires_build_by_snapshot: None, engine_name: None, side_effects_cache: true, side_effects_cache_write: false, @@ -466,6 +516,7 @@ fn build_modules_excludes_explicit_deny_from_ignored() { packages: None, allow_build_policy: &policy, side_effects_maps_by_snapshot: None, + requires_build_by_snapshot: None, engine_name: None, side_effects_cache: true, side_effects_cache_write: false, @@ -550,6 +601,7 @@ fn do_not_fail_on_optional_dep_with_failing_postinstall() { packages: None, allow_build_policy: &policy, side_effects_maps_by_snapshot: None, + requires_build_by_snapshot: None, engine_name: None, side_effects_cache: true, side_effects_cache_write: false, @@ -689,6 +741,7 @@ fn using_side_effects_cache_skips_rebuild() { importers: &importers, allow_build_policy: &policy, side_effects_maps_by_snapshot: Some(&side_effects_maps), + requires_build_by_snapshot: None, engine_name: Some(engine), side_effects_cache: true, side_effects_cache_write: false, @@ -757,6 +810,7 @@ fn side_effects_cache_disabled_bypasses_the_gate() { importers: &importers, allow_build_policy: &policy, side_effects_maps_by_snapshot: Some(&side_effects_maps), + requires_build_by_snapshot: None, engine_name: Some("darwin;arm64;node20"), side_effects_cache: false, side_effects_cache_write: false, @@ -818,6 +872,7 @@ fn fail_when_failing_postinstall_is_required() { packages: None, allow_build_policy: &policy, side_effects_maps_by_snapshot: None, + requires_build_by_snapshot: None, engine_name: None, side_effects_cache: true, side_effects_cache_write: false, @@ -901,6 +956,7 @@ fn frozen_backstop_run( importers: &importers, allow_build_policy: &policy, side_effects_maps_by_snapshot: None, + requires_build_by_snapshot: None, engine_name: None, side_effects_cache: false, side_effects_cache_write: false, @@ -1219,6 +1275,7 @@ async fn write_path_populates_side_effects_row() { importers: &importers, allow_build_policy: &policy, side_effects_maps_by_snapshot: None, + requires_build_by_snapshot: None, engine_name: Some(engine), side_effects_cache: true, side_effects_cache_write: true, @@ -1334,6 +1391,7 @@ async fn write_path_disabled_skips_upload() { importers: &importers, allow_build_policy: &policy, side_effects_maps_by_snapshot: None, + requires_build_by_snapshot: None, engine_name: Some("darwin;arm64;node20"), side_effects_cache: true, side_effects_cache_write: false, @@ -1457,6 +1515,7 @@ async fn upload_error_does_not_interrupt_install() { importers: &importers, allow_build_policy: &policy, side_effects_maps_by_snapshot: None, + requires_build_by_snapshot: None, engine_name: Some("darwin;arm64;node20"), side_effects_cache: true, side_effects_cache_write: true, @@ -1691,6 +1750,7 @@ new file mode 100644 importers: &importers, allow_build_policy: &policy, side_effects_maps_by_snapshot: None, + requires_build_by_snapshot: None, engine_name: Some(engine), side_effects_cache: true, side_effects_cache_write: true, @@ -1801,6 +1861,7 @@ new file mode 100644 importers: &importers, allow_build_policy: &policy, side_effects_maps_by_snapshot: None, + requires_build_by_snapshot: None, engine_name: None, side_effects_cache: false, side_effects_cache_write: false, @@ -1882,6 +1943,7 @@ async fn missing_patch_file_path_errors_with_diagnostic() { importers: &importers, allow_build_policy: &policy, side_effects_maps_by_snapshot: None, + requires_build_by_snapshot: None, engine_name: None, side_effects_cache: false, side_effects_cache_write: false, diff --git a/pacquet/crates/package-manager/src/create_virtual_store.rs b/pacquet/crates/package-manager/src/create_virtual_store.rs index 1b4368e678..131cdf22af 100644 --- a/pacquet/crates/package-manager/src/create_virtual_store.rs +++ b/pacquet/crates/package-manager/src/create_virtual_store.rs @@ -12,6 +12,7 @@ use pacquet_lockfile::{ SnapshotEntry, select_platform_variant, }; use pacquet_network::ThrottledClient; +use pacquet_package_manifest::{files_include_install_scripts, manifest_requires_build}; use pacquet_reporter::{ BrokenModulesLog, LogEvent, LogLevel, ProgressLog, ProgressMessage, Reporter, StatsLog, StatsMessage, @@ -24,6 +25,7 @@ use pacquet_tarball::{MemCache, PrefetchResult, SharedReportedProgressKeys, pref use pipe_trait::Pipe; use std::{ collections::{HashMap, HashSet}, + fs, path::{Path, PathBuf}, sync::atomic::AtomicU8, }; @@ -65,6 +67,11 @@ pub type PackageManifests = HashMap>>>; +/// Per-snapshot `requiresBuild` flags recovered from the store index +/// during the warm-cache prefetch. `BuildModules` consumes this to +/// avoid re-inspecting every package directory after materialization. +pub type RequiresBuildBySnapshot = HashMap; + /// Output of [`CreateVirtualStore::run`]. Bundles the bin-link /// manifest cache, the per-snapshot side-effects-cache overlays the /// build-phase needs, and the per-install fetch-failure set. @@ -80,6 +87,7 @@ pub type SideEffectsMapsBySnapshot = pub struct CreateVirtualStoreOutput { pub package_manifests: PackageManifests, pub side_effects_maps_by_snapshot: SideEffectsMapsBySnapshot, + pub requires_build_by_snapshot: RequiresBuildBySnapshot, pub fetch_failed: HashSet, /// Per-package CAS index, populated only when /// [`CreateVirtualStore::node_linker`] is @@ -233,6 +241,7 @@ impl CreateVirtualStore<'_> { return Ok(CreateVirtualStoreOutput { package_manifests: PackageManifests::new(), side_effects_maps_by_snapshot: SideEffectsMapsBySnapshot::new(), + requires_build_by_snapshot: RequiresBuildBySnapshot::new(), fetch_failed: HashSet::new(), cas_paths_by_pkg_id: is_hoisted.then(CasPathsByPkgId::new), }); @@ -540,6 +549,7 @@ impl CreateVirtualStore<'_> { cas_paths: prefetched, manifests: prefetched_manifests, side_effects_maps: prefetched_side_effects, + requires_build: prefetched_requires_build, } = prefetch_cas_paths( store_index.clone(), store_dir, @@ -595,6 +605,8 @@ impl CreateVirtualStore<'_> { HashMap::with_capacity(prefetched_manifests.len()); let mut side_effects_maps_by_snapshot: SideEffectsMapsBySnapshot = HashMap::with_capacity(prefetched_side_effects.len()); + let mut requires_build_by_snapshot: RequiresBuildBySnapshot = + HashMap::with_capacity(prefetched_requires_build.len()); // First pass: process *skipped* snapshots into the bin- // manifest cache and the side-effects map. They don't enter @@ -617,6 +629,11 @@ impl CreateVirtualStore<'_> { side_effects_maps_by_snapshot .insert((*snapshot_key).clone(), std::sync::Arc::clone(maps)); } + if let Some(cache_key) = cache_key.as_deref() + && let Some(&requires_build) = prefetched_requires_build.get(cache_key) + { + requires_build_by_snapshot.insert((*snapshot_key).clone(), requires_build); + } } // Second pass: survivors. Same loop as above plus the @@ -638,6 +655,11 @@ impl CreateVirtualStore<'_> { side_effects_maps_by_snapshot .insert((*snapshot_key).clone(), std::sync::Arc::clone(maps)); } + if let Some(cache_key) = cache_key.as_deref() + && let Some(&requires_build) = prefetched_requires_build.get(cache_key) + { + requires_build_by_snapshot.insert((*snapshot_key).clone(), requires_build); + } // Carry the cache key alongside the warm entry so the // reporter can skip a duplicate package-status event when // a resolve-time prefetch already emitted it. @@ -745,14 +767,14 @@ impl CreateVirtualStore<'_> { // future returns; under hoisted no slot was written and the // CAS index is the only output. let mut fetch_failed: HashSet = HashSet::new(); - let mut cold_cas_paths: Vec<(&PackageKey, &SnapshotEntry, HashMap)> = + let mut cold_cas_paths: Vec<(&PackageKey, &SnapshotEntry, HashMap, bool)> = Vec::new(); if !cold.is_empty() { let prefetched_ref = Some(&prefetched); let verified_files_cache_ref = &verified_files_cache; type ColdOutcome<'a> = ( Option, - Option<(&'a PackageKey, &'a SnapshotEntry, HashMap)>, + Option<(&'a PackageKey, &'a SnapshotEntry, HashMap, bool)>, ); let outcomes: Vec> = cold .iter() @@ -793,7 +815,10 @@ impl CreateVirtualStore<'_> { .run::() .await; match result { - Ok(cas_paths) => Ok((None, Some((*snapshot_key, *snapshot, cas_paths)))), + Ok(cas_paths) => { + let requires_build = requires_build_from_cas_paths(&cas_paths); + Ok((None, Some((*snapshot_key, *snapshot, cas_paths, requires_build)))) + } Err(err) if snapshot.optional && is_fetch_side_failure(&err) => { // Silent swallow, matching upstream. `tracing::warn!` // gives operator visibility without polluting @@ -832,6 +857,7 @@ impl CreateVirtualStore<'_> { fetch_failed.insert(key); } if let Some(captured) = captured { + requires_build_by_snapshot.insert((*captured.0).clone(), captured.3); cold_cas_paths.push(captured); } } @@ -849,7 +875,7 @@ impl CreateVirtualStore<'_> { if !is_hoisted && !cold_cas_paths.is_empty() { let cold_slots: Vec> = cold_cas_paths .iter() - .map(|(snapshot_key, snapshot, cas_paths)| SlotLink { + .map(|(snapshot_key, snapshot, cas_paths, _requires_build)| SlotLink { snapshot_key, snapshot, cas_paths, @@ -901,7 +927,7 @@ impl CreateVirtualStore<'_> { // real install. if let Some(map) = cas_paths_by_pkg_id.as_mut() { map.reserve(cold_cas_paths.len()); - for (snapshot_key, _snapshot, paths) in cold_cas_paths { + for (snapshot_key, _snapshot, paths, _requires_build) in cold_cas_paths { // Mirrors upstream's `getPkgIdWithPatchHash` — strip // the peer-graph suffix but keep `(patch_hash=...)` so // patched packages share one CAS-paths entry across @@ -922,12 +948,25 @@ impl CreateVirtualStore<'_> { Ok(CreateVirtualStoreOutput { package_manifests, side_effects_maps_by_snapshot, + requires_build_by_snapshot, fetch_failed, cas_paths_by_pkg_id, }) } } +fn requires_build_from_cas_paths(cas_paths: &HashMap) -> bool { + if files_include_install_scripts(cas_paths.keys()) { + return true; + } + let Some(package_json) = cas_paths.get("package.json") else { return false }; + let Ok(contents) = fs::read_to_string(package_json) else { return false }; + let Ok(manifest) = serde_json::from_str::(&contents) else { + return false; + }; + manifest_requires_build(&manifest) +} + struct SlotLink<'a> { snapshot_key: &'a PackageKey, snapshot: &'a SnapshotEntry, diff --git a/pacquet/crates/package-manager/src/create_virtual_store/tests.rs b/pacquet/crates/package-manager/src/create_virtual_store/tests.rs index f43adc1940..10143291db 100644 --- a/pacquet/crates/package-manager/src/create_virtual_store/tests.rs +++ b/pacquet/crates/package-manager/src/create_virtual_store/tests.rs @@ -83,8 +83,14 @@ async fn cold_batch_links_slots_in_parallel() { let source_dir = workspace_root.join("prefetched").join(package_name); fs::create_dir_all(&source_dir).expect("create prefetched package dir"); let manifest_path = source_dir.join("package.json"); - fs::write(&manifest_path, format!(r#"{{"name":"{package_name}","version":"1.0.0"}}"#)) - .expect("write package manifest"); + let manifest = if package_name == "cold-a" { + format!( + r#"{{"name":"{package_name}","version":"1.0.0","scripts":{{"postinstall":"node build.js"}}}}"#, + ) + } else { + format!(r#"{{"name":"{package_name}","version":"1.0.0"}}"#) + }; + fs::write(&manifest_path, manifest).expect("write package manifest"); let index_path = source_dir.join("index.js"); fs::write(&index_path, "module.exports = true\n").expect("write package body"); @@ -117,7 +123,7 @@ async fn cold_batch_links_slots_in_parallel() { let probe = crate::create_virtual_dir_by_snapshot::tests::LinkConcurrencyProbe::waiting_for_overlap(); - CreateVirtualStore { + let output = CreateVirtualStore { http_client: &pacquet_network::ThrottledClient::default(), config, packages: Some(&packages), @@ -149,6 +155,10 @@ async fn cold_batch_links_slots_in_parallel() { probe.max_concurrent(), rayon::current_num_threads(), ); + let cold_a = key("cold-a", "1.0.0"); + let cold_b = key("cold-b", "1.0.0"); + assert_eq!(output.requires_build_by_snapshot.get(&cold_a), Some(&true)); + assert_eq!(output.requires_build_by_snapshot.get(&cold_b), Some(&false)); } const DUMMY_SHA512: &str = "sha512-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="; diff --git a/pacquet/crates/package-manager/src/install_frozen_lockfile.rs b/pacquet/crates/package-manager/src/install_frozen_lockfile.rs index 039d8fa27e..682ff44012 100644 --- a/pacquet/crates/package-manager/src/install_frozen_lockfile.rs +++ b/pacquet/crates/package-manager/src/install_frozen_lockfile.rs @@ -696,6 +696,7 @@ where let CreateVirtualStoreOutput { package_manifests, side_effects_maps_by_snapshot, + requires_build_by_snapshot, fetch_failed, cas_paths_by_pkg_id, } = { @@ -1064,6 +1065,7 @@ where importers, allow_build_policy: &allow_build_policy, side_effects_maps_by_snapshot: Some(&side_effects_maps_by_snapshot), + requires_build_by_snapshot: Some(&requires_build_by_snapshot), engine_name: engine_name.as_deref(), side_effects_cache: config.side_effects_cache_read(), side_effects_cache_write: config.side_effects_cache_write(), diff --git a/pacquet/crates/package-manager/src/install_with_fresh_lockfile.rs b/pacquet/crates/package-manager/src/install_with_fresh_lockfile.rs index 14dae309b0..ca972b9ec4 100644 --- a/pacquet/crates/package-manager/src/install_with_fresh_lockfile.rs +++ b/pacquet/crates/package-manager/src/install_with_fresh_lockfile.rs @@ -1315,6 +1315,7 @@ impl InstallWithFreshLockfile<'_, DependencyGroupList> { cas_paths: prefetched_cas_paths, manifests: prefetched_manifests, side_effects_maps: _, + requires_build: _, } = prefetch; tracing::info!( target: "pacquet::install::phase", @@ -1429,6 +1430,7 @@ impl InstallWithFreshLockfile<'_, DependencyGroupList> { let CreateVirtualStoreOutput { package_manifests, side_effects_maps_by_snapshot: _, + requires_build_by_snapshot: _, fetch_failed: _, // Populated only under `node_linker == Hoisted`; consumed by // the hoisted-linker pass below to materialize the on-disk diff --git a/pacquet/crates/package-manifest/src/lib.rs b/pacquet/crates/package-manifest/src/lib.rs index 5d23054432..f576de3676 100644 --- a/pacquet/crates/package-manifest/src/lib.rs +++ b/pacquet/crates/package-manifest/src/lib.rs @@ -423,11 +423,19 @@ pub fn safe_read_package_json_from_dir(dir: &Path) -> Result, Pack /// directory. Missing manifests, IO errors, and parse errors all collapse to /// `false` — pacquet cannot meaningfully build a package whose extracted /// content cannot be inspected. +#[must_use] pub fn pkg_requires_build(pkg_root: &Path) -> bool { if pkg_root.join("binding.gyp").exists() || pkg_root.join(".hooks").is_dir() { return true; } let Ok(Some(manifest)) = safe_read_package_json_from_dir(pkg_root) else { return false }; + manifest_requires_build(&manifest) +} + +/// Decide whether a parsed manifest declares lifecycle scripts that +/// make its package a build candidate. +#[must_use] +pub fn manifest_requires_build(manifest: &Value) -> bool { manifest.get("scripts").and_then(Value::as_object).is_some_and(|scripts| { scripts.contains_key("preinstall") || scripts.contains_key("install") @@ -435,5 +443,23 @@ pub fn pkg_requires_build(pkg_root: &Path) -> bool { }) } +/// Decide whether a store-index file key implies build hooks. +#[must_use] +pub fn file_path_requires_build(filename: &str) -> bool { + filename == "binding.gyp" + || filename + .strip_prefix(".hooks") + .is_some_and(|suffix| suffix.starts_with('/') || suffix.starts_with('\\')) +} + +#[must_use] +pub fn files_include_install_scripts(filenames: Filenames) -> bool +where + Filenames: IntoIterator, + Filename: AsRef, +{ + filenames.into_iter().any(|filename| file_path_requires_build(filename.as_ref())) +} + #[cfg(test)] mod tests; diff --git a/pacquet/crates/tarball/Cargo.toml b/pacquet/crates/tarball/Cargo.toml index 6dfd08985f..c159a11a1e 100644 --- a/pacquet/crates/tarball/Cargo.toml +++ b/pacquet/crates/tarball/Cargo.toml @@ -11,11 +11,12 @@ license.workspace = true repository.workspace = true [dependencies] -pacquet-diagnostics = { workspace = true } -pacquet-fs = { workspace = true } -pacquet-network = { workspace = true } -pacquet-reporter = { workspace = true } -pacquet-store-dir = { workspace = true } +pacquet-diagnostics = { workspace = true } +pacquet-fs = { workspace = true } +pacquet-network = { workspace = true } +pacquet-package-manifest = { workspace = true } +pacquet-reporter = { workspace = true } +pacquet-store-dir = { workspace = true } dashmap = { workspace = true } derive_more = { workspace = true } diff --git a/pacquet/crates/tarball/src/lib.rs b/pacquet/crates/tarball/src/lib.rs index fec657ba1f..78ff943c83 100644 --- a/pacquet/crates/tarball/src/lib.rs +++ b/pacquet/crates/tarball/src/lib.rs @@ -12,6 +12,7 @@ use miette::Diagnostic; use pacquet_fs::file_mode; pub use pacquet_network::RetryOpts; use pacquet_network::{AuthHeaders, ThrottledClient, UNPRIORITIZED}; +use pacquet_package_manifest::{files_include_install_scripts, manifest_requires_build}; use pacquet_reporter::{ FetchingProgressLog, FetchingProgressMessage, LogEvent, LogLevel, ProgressLog, ProgressMessage, Reporter, RequestRetryError, RequestRetryLog, @@ -565,6 +566,8 @@ fn extract_tarball_entries( // manifest is captured here too, off the raw payload slice. let mut pending: Vec> = Vec::with_capacity(capacity); let mut manifest = None; + let mut manifest_build_scripts = false; + let mut file_build_hooks = false; for entry in entries { let entry = entry.map_err(TarballError::ReadTarballEntries)?; @@ -651,6 +654,9 @@ fn extract_tarball_entries( { continue; } + if files_include_install_scripts([cleaned_entry_path.as_str()]) { + file_build_hooks = true; + } // Capture the parsed manifest whenever we see `package.json`. // Mirrors pnpm's `bundledManifest` pass-through at // [pnpm/pnpm@4750fd370c]: pnpm stuffs the narrowed manifest @@ -678,12 +684,16 @@ fn extract_tarball_entries( // [`addFilesFromTarball`]: if cleaned_entry_path == "package.json" { match serde_json::from_slice::(entry_data) { - Ok(parsed) => manifest = normalize_bundled_manifest(&parsed), + Ok(parsed) => { + manifest_build_scripts = manifest_requires_build(&parsed); + manifest = normalize_bundled_manifest(&parsed); + } Err(error) => { tracing::debug!( ?error, "package.json in tarball failed to parse as JSON; bundled manifest cleared", ); + manifest_build_scripts = false; manifest = None; } } @@ -739,7 +749,7 @@ fn extract_tarball_entries( let pkg_files_idx = PackageFilesIndex { manifest, - requires_build: None, + requires_build: Some(manifest_build_scripts || file_build_hooks), algo: "sha512".to_string(), files, side_effects: None, @@ -1020,17 +1030,27 @@ pub type PrefetchedManifests = HashMap>; pub type PrefetchedSideEffectsMaps = HashMap>>>; +/// `requiresBuild` flags recovered from the same `index.db` rows as +/// [`PrefetchedCasPaths`]. Missing values in old rows are recomputed +/// from the bundled manifest plus verified file keys, mirroring +/// pnpm's worker fallback when `pkgFilesIndex.requiresBuild` is absent. +pub type PrefetchedRequiresBuild = HashMap; + +type DecodedPrefetchRow = + (String, Option>, Option, pacquet_store_dir::VerifyResult); + /// Output of [`prefetch_cas_paths`]: the warm-cache filesystem map /// plus any bundled manifests and side-effects overlays recovered /// from the same `index.db` rows. Bundled in a single struct so -/// callers can destructure all three after one `await`, rather than -/// the function having to thread three separate `spawn_blocking` +/// callers can destructure all cached facts after one `await`, rather +/// than the function having to thread several separate `spawn_blocking` /// round-trips through. #[derive(Default)] pub struct PrefetchResult { pub cas_paths: PrefetchedCasPaths, pub manifests: PrefetchedManifests, pub side_effects_maps: PrefetchedSideEffectsMaps, + pub requires_build: PrefetchedRequiresBuild, } /// Batch the entire warm-cache lookup phase into one `spawn_blocking` @@ -1124,7 +1144,7 @@ pub async fn prefetch_cas_paths( // `Option::take` so it travels back to the caller without // an intermediate `Value::clone` of the JSON tree — the // verify function only inspects `files`, never `manifest`. - let decoded: Vec<(String, Option>, pacquet_store_dir::VerifyResult)> = raw + let decoded: Vec = raw .into_par_iter() .filter_map(|(cache_key, bytes)| { let mut entry: PackageFilesIndex = match pacquet_store_dir::decode_package_files_index(&bytes) { @@ -1139,6 +1159,7 @@ pub async fn prefetch_cas_paths( return None; } }; + let stored_requires_build = entry.requires_build; let manifest = entry.manifest.take().map(Arc::new); let verify_result = if verify_store_integrity { pacquet_store_dir::check_pkg_files_integrity( @@ -1149,15 +1170,20 @@ pub async fn prefetch_cas_paths( } else { pacquet_store_dir::build_file_maps_from_index(store_dir, entry) }; - Some((cache_key, manifest, verify_result)) + Some((cache_key, manifest, stored_requires_build, verify_result)) }) .collect(); let mut cas_paths = HashMap::with_capacity(decoded.len()); let mut manifests = HashMap::new(); let mut side_effects_maps = HashMap::new(); - for (cache_key, manifest, verify_result) in decoded { + let mut requires_build = HashMap::with_capacity(decoded.len()); + for (cache_key, manifest, stored_requires_build, verify_result) in decoded { if verify_result.passed { + let calculated_requires_build = stored_requires_build.unwrap_or_else(|| { + manifest.as_deref().is_some_and(manifest_requires_build) + || files_include_install_scripts(verify_result.files_map.keys()) + }); if let Some(manifest) = manifest { manifests.insert(cache_key.clone(), manifest); } @@ -1166,10 +1192,11 @@ pub async fn prefetch_cas_paths( { side_effects_maps.insert(cache_key.clone(), Arc::new(maps)); } + requires_build.insert(cache_key.clone(), calculated_requires_build); cas_paths.insert(cache_key, Arc::new(verify_result.files_map)); } } - PrefetchResult { cas_paths, manifests, side_effects_maps } + PrefetchResult { cas_paths, manifests, side_effects_maps, requires_build } }) .await; result.unwrap_or_else(|error| { diff --git a/pacquet/crates/tarball/src/tests.rs b/pacquet/crates/tarball/src/tests.rs index a378b948be..76023e43f2 100644 --- a/pacquet/crates/tarball/src/tests.rs +++ b/pacquet/crates/tarball/src/tests.rs @@ -489,6 +489,56 @@ async fn prefetch_cas_paths_returns_hits_for_live_index_rows() { let map = prefetched.cas_paths.get(&index_key).expect("hit"); assert_eq!(map.get("package.json"), Some(&pkg_json_path)); + assert_eq!(prefetched.requires_build.get(&index_key), Some(&false)); + drop(store_dir); +} + +#[tokio::test] +async fn prefetch_cas_paths_recomputes_requires_build_for_legacy_rows() { + let (store_dir, store_path) = tempdir_with_leaked_path(); + + let manifest_bytes = br#"{"name":"fake","scripts":{"postinstall":"node build.js"}}"#; + let (pkg_json_path, pkg_json_hash) = store_path.write_cas_file(manifest_bytes, false).unwrap(); + + let pkg_integrity = integrity( + "sha512-q/IXcMGuF8v7ZLf/JeYfE/pB4Wg1yxT6jXJz8JxRK7a4mJSXV1QKMXDPfZkvMHTZpYxWBDoJiXtptDWFnoCA2w==", + ); + let pkg_id = "fake@1.0.0"; + let index_key = store_index_key(&pkg_integrity.to_string(), pkg_id); + + let mut files = HashMap::new(); + files.insert( + "package.json".to_string(), + CafsFileInfo { + digest: format!("{pkg_json_hash:x}"), + mode: 0o644, + size: manifest_bytes.len() as u64, + checked_at: None, + }, + ); + let entry = PackageFilesIndex { + manifest: Some(serde_json::from_slice(manifest_bytes).unwrap()), + requires_build: None, + algo: "sha512".to_string(), + files, + side_effects: None, + }; + let index = StoreIndex::open_in(store_path).unwrap(); + index.set(&index_key, &entry).unwrap(); + drop(index); + + let prefetched = prefetch_cas_paths( + StoreIndex::shared_readonly_in(store_path), + store_path, + vec![index_key.clone()], + true, + SharedVerifiedFilesCache::default(), + ) + .await; + + let map = prefetched.cas_paths.get(&index_key).expect("hit"); + assert_eq!(map.get("package.json"), Some(&pkg_json_path)); + assert_eq!(prefetched.requires_build.get(&index_key), Some(&true)); drop(store_dir); } @@ -1018,10 +1068,37 @@ fn extract_tarball_applies_ignore_filter_dropping_entries_from_both_maps() { !pkg_files_idx.files.contains_key("lib/node_modules/npm/package.json"), "ignore filter should drop bundled npm from pkg_files_idx.files", ); + assert_eq!(pkg_files_idx.requires_build, Some(false)); drop(tempdir); } +#[test] +fn extract_tarball_records_requires_build_from_manifest() { + let (tempdir, store_path) = tempdir_with_leaked_path(); + + let mut tar_bytes = Vec::new(); + { + let mut builder = tar::Builder::new(&mut tar_bytes); + let body = br#"{"scripts":{"install":"node-gyp rebuild"}}"#; + let mut header = tar::Header::new_gnu(); + header.set_size(body.len() as u64); + header.set_mode(0o644); + header.set_entry_type(tar::EntryType::Regular); + header.set_cksum(); + builder + .append_data(&mut header, "package/package.json", &body[..]) + .expect("append manifest"); + builder.finish().expect("finalize tar"); + } + + let (_cas_paths, pkg_files_idx) = + extract_tarball_entries(&tar_bytes, store_path, None).expect("tarball extraction"); + + assert_eq!(pkg_files_idx.requires_build, Some(true)); + drop(tempdir); +} + /// `RetryOpts::default()` reproduces pnpm's /// `network/fetch/src/fetch.ts` defaults: 2 retries, factor 10, /// minTimeout 10 s, maxTimeout 60 s. The first post-failure delay