perf(pacquet): cache build metadata during installs (#12360)

* perf(pacquet): cache build metadata during installs

* fix(pacquet): satisfy clippy for build helpers

* fix(pacquet): reduce prefetch row type complexity
This commit is contained in:
Zoltan Kochan
2026-06-12 17:27:13 +02:00
committed by GitHub
parent 2aa2eaa6ff
commit 43b5bf7520
11 changed files with 286 additions and 38 deletions

1
Cargo.lock generated
View File

@@ -4366,6 +4366,7 @@ dependencies = [
"pacquet-diagnostics",
"pacquet-fs",
"pacquet-network",
"pacquet-package-manifest",
"pacquet-reporter",
"pacquet-store-dir",
"pipe-trait",

View File

@@ -300,6 +300,10 @@ pub struct BuildModules<'a> {
/// disabled or no rows were prefetched; the gate falls through
/// to "rebuild" for every snapshot.
pub side_effects_maps_by_snapshot: Option<&'a crate::SideEffectsMapsBySnapshot>,
/// Per-snapshot `requiresBuild` values from the warm-cache
/// prefetch. Missing entries fall back to inspecting the
/// materialized package directory.
pub requires_build_by_snapshot: Option<&'a crate::RequiresBuildBySnapshot>,
/// `<platform>;<arch>;node<major>` — the prefix part of
/// upstream's dep-state cache key. Computed once at install
/// start by [`pacquet_graph_hasher::detect_node_major`] +
@@ -421,6 +425,7 @@ impl BuildModules<'_> {
importers,
allow_build_policy,
side_effects_maps_by_snapshot,
requires_build_by_snapshot,
engine_name,
side_effects_cache,
side_effects_cache_write,
@@ -440,14 +445,9 @@ impl BuildModules<'_> {
let extra_env = HashMap::new();
// Compute requires_build per snapshot from each extracted package
// directory. Mirrors upstream where the worker computes
// `node.requiresBuild` from the package's manifest scripts and the
// presence of `binding.gyp` / `.hooks/` after extraction
// (`https://github.com/pnpm/pnpm/blob/80037699fb/building/pkg-requires-build/src/index.ts`).
// Pacquet does this here rather than in a worker because the worker
// does not exist yet — it is the same per-package on-disk inspection,
// moved to the build entry point.
// Compute `requiresBuild` per snapshot. Warm store-index rows
// already carry the upstream worker's answer, so only misses
// need to inspect the materialized package directory.
let requires_build_map: HashMap<PackageKey, bool> = snapshots
.keys()
// Skip snapshots that never landed on disk. `pkg_requires_build`
@@ -457,14 +457,15 @@ impl BuildModules<'_> {
// optional fan-out.
.filter(|key| !skipped.contains(key))
.map(|key| {
// Hoisted snapshots without a recorded `pkgRoot` (the
// walker dropped them) get `requires_build = false`
// so they fall through both the script-runner and the
// patch-apply gates without a syscall, matching the
// isolated path's `pkg_dir.exists() == false` skip.
let requires = pkg_root_for_key(layout, pkg_root_by_key, key)
.as_deref()
.is_some_and(pkg_requires_build);
let pkg_root = pkg_root_for_key(layout, pkg_root_by_key, key);
let requires = match (
pkg_root.as_deref(),
requires_build_by_snapshot.and_then(|map| map.get(key).copied()),
) {
(None, _) => false,
(_, Some(requires)) => requires,
(Some(pkg_root), None) => pkg_requires_build(pkg_root),
};
(key.clone(), requires)
})
.collect();

View File

@@ -1,5 +1,5 @@
use super::{AllowBuildPolicy, BuildModules, parse_name_version_from_key};
use crate::{SkippedSnapshots, VirtualStoreLayout};
use crate::{RequiresBuildBySnapshot, SkippedSnapshots, VirtualStoreLayout};
use pacquet_config::Config;
use pacquet_executor::ScriptsPrependNodePath;
use pacquet_lockfile::{
@@ -332,6 +332,7 @@ fn build_modules_collects_ignored_builds() {
packages: None,
allow_build_policy: &policy,
side_effects_maps_by_snapshot: None,
requires_build_by_snapshot: None,
engine_name: None,
side_effects_cache: true,
side_effects_cache_write: false,
@@ -358,6 +359,54 @@ fn build_modules_collects_ignored_builds() {
);
}
#[test]
fn cached_requires_build_false_skips_package_dir_probe() {
let pkg_key = key("aaa", "1.0.0");
let snapshots = HashMap::from([(pkg_key.clone(), SnapshotEntry::default())]);
let importers = root_importers(&[("aaa", "1.0.0")]);
let policy = AllowBuildPolicy::default();
let virtual_store_dir = tempdir().expect("create temp dir");
let modules_dir = tempdir().expect("create temp dir");
let lockfile_dir = tempdir().expect("create temp dir");
create_buildable_pkg(virtual_store_dir.path(), &pkg_key);
let requires_build_by_snapshot = RequiresBuildBySnapshot::from([(pkg_key, false)]);
let ignored = BuildModules {
layout: &VirtualStoreLayout::legacy(
virtual_store_dir.path(),
pacquet_config::default_virtual_store_dir_max_length() as usize,
),
modules_dir: modules_dir.path(),
lockfile_dir: lockfile_dir.path(),
snapshots: Some(&snapshots),
importers: &importers,
packages: None,
allow_build_policy: &policy,
side_effects_maps_by_snapshot: None,
requires_build_by_snapshot: Some(&requires_build_by_snapshot),
engine_name: None,
side_effects_cache: true,
side_effects_cache_write: false,
store_dir: None,
store_index_writer: None,
patches: None,
scripts_prepend_node_path: ScriptsPrependNodePath::Never,
unsafe_perm: true,
child_concurrency: 1,
skipped: &SkippedSnapshots::default(),
pkg_root_by_key: None,
gather_ancestor_bin_paths: false,
frozen_store: false,
}
.run::<SilentReporter>()
.expect("run BuildModules");
assert!(ignored.is_empty());
}
/// Parallel-path variant of [`build_modules_collects_ignored_builds`]
/// running under `child_concurrency: 2` so the rayon
/// `chunk.par_iter().try_for_each(...)` dispatch is actually
@@ -405,6 +454,7 @@ fn build_modules_collects_ignored_builds_under_concurrency() {
packages: None,
allow_build_policy: &policy,
side_effects_maps_by_snapshot: None,
requires_build_by_snapshot: None,
engine_name: None,
side_effects_cache: true,
side_effects_cache_write: false,
@@ -466,6 +516,7 @@ fn build_modules_excludes_explicit_deny_from_ignored() {
packages: None,
allow_build_policy: &policy,
side_effects_maps_by_snapshot: None,
requires_build_by_snapshot: None,
engine_name: None,
side_effects_cache: true,
side_effects_cache_write: false,
@@ -550,6 +601,7 @@ fn do_not_fail_on_optional_dep_with_failing_postinstall() {
packages: None,
allow_build_policy: &policy,
side_effects_maps_by_snapshot: None,
requires_build_by_snapshot: None,
engine_name: None,
side_effects_cache: true,
side_effects_cache_write: false,
@@ -689,6 +741,7 @@ fn using_side_effects_cache_skips_rebuild() {
importers: &importers,
allow_build_policy: &policy,
side_effects_maps_by_snapshot: Some(&side_effects_maps),
requires_build_by_snapshot: None,
engine_name: Some(engine),
side_effects_cache: true,
side_effects_cache_write: false,
@@ -757,6 +810,7 @@ fn side_effects_cache_disabled_bypasses_the_gate() {
importers: &importers,
allow_build_policy: &policy,
side_effects_maps_by_snapshot: Some(&side_effects_maps),
requires_build_by_snapshot: None,
engine_name: Some("darwin;arm64;node20"),
side_effects_cache: false,
side_effects_cache_write: false,
@@ -818,6 +872,7 @@ fn fail_when_failing_postinstall_is_required() {
packages: None,
allow_build_policy: &policy,
side_effects_maps_by_snapshot: None,
requires_build_by_snapshot: None,
engine_name: None,
side_effects_cache: true,
side_effects_cache_write: false,
@@ -901,6 +956,7 @@ fn frozen_backstop_run(
importers: &importers,
allow_build_policy: &policy,
side_effects_maps_by_snapshot: None,
requires_build_by_snapshot: None,
engine_name: None,
side_effects_cache: false,
side_effects_cache_write: false,
@@ -1219,6 +1275,7 @@ async fn write_path_populates_side_effects_row() {
importers: &importers,
allow_build_policy: &policy,
side_effects_maps_by_snapshot: None,
requires_build_by_snapshot: None,
engine_name: Some(engine),
side_effects_cache: true,
side_effects_cache_write: true,
@@ -1334,6 +1391,7 @@ async fn write_path_disabled_skips_upload() {
importers: &importers,
allow_build_policy: &policy,
side_effects_maps_by_snapshot: None,
requires_build_by_snapshot: None,
engine_name: Some("darwin;arm64;node20"),
side_effects_cache: true,
side_effects_cache_write: false,
@@ -1457,6 +1515,7 @@ async fn upload_error_does_not_interrupt_install() {
importers: &importers,
allow_build_policy: &policy,
side_effects_maps_by_snapshot: None,
requires_build_by_snapshot: None,
engine_name: Some("darwin;arm64;node20"),
side_effects_cache: true,
side_effects_cache_write: true,
@@ -1691,6 +1750,7 @@ new file mode 100644
importers: &importers,
allow_build_policy: &policy,
side_effects_maps_by_snapshot: None,
requires_build_by_snapshot: None,
engine_name: Some(engine),
side_effects_cache: true,
side_effects_cache_write: true,
@@ -1801,6 +1861,7 @@ new file mode 100644
importers: &importers,
allow_build_policy: &policy,
side_effects_maps_by_snapshot: None,
requires_build_by_snapshot: None,
engine_name: None,
side_effects_cache: false,
side_effects_cache_write: false,
@@ -1882,6 +1943,7 @@ async fn missing_patch_file_path_errors_with_diagnostic() {
importers: &importers,
allow_build_policy: &policy,
side_effects_maps_by_snapshot: None,
requires_build_by_snapshot: None,
engine_name: None,
side_effects_cache: false,
side_effects_cache_write: false,

View File

@@ -12,6 +12,7 @@ use pacquet_lockfile::{
SnapshotEntry, select_platform_variant,
};
use pacquet_network::ThrottledClient;
use pacquet_package_manifest::{files_include_install_scripts, manifest_requires_build};
use pacquet_reporter::{
BrokenModulesLog, LogEvent, LogLevel, ProgressLog, ProgressMessage, Reporter, StatsLog,
StatsMessage,
@@ -24,6 +25,7 @@ use pacquet_tarball::{MemCache, PrefetchResult, SharedReportedProgressKeys, pref
use pipe_trait::Pipe;
use std::{
collections::{HashMap, HashSet},
fs,
path::{Path, PathBuf},
sync::atomic::AtomicU8,
};
@@ -65,6 +67,11 @@ pub type PackageManifests = HashMap<PkgNameVerPeer, std::sync::Arc<serde_json::V
pub type SideEffectsMapsBySnapshot =
HashMap<PackageKey, std::sync::Arc<HashMap<String, HashMap<String, PathBuf>>>>;
/// Per-snapshot `requiresBuild` flags recovered from the store index
/// during the warm-cache prefetch. `BuildModules` consumes this to
/// avoid re-inspecting every package directory after materialization.
pub type RequiresBuildBySnapshot = HashMap<PackageKey, bool>;
/// Output of [`CreateVirtualStore::run`]. Bundles the bin-link
/// manifest cache, the per-snapshot side-effects-cache overlays the
/// build-phase needs, and the per-install fetch-failure set.
@@ -80,6 +87,7 @@ pub type SideEffectsMapsBySnapshot =
pub struct CreateVirtualStoreOutput {
pub package_manifests: PackageManifests,
pub side_effects_maps_by_snapshot: SideEffectsMapsBySnapshot,
pub requires_build_by_snapshot: RequiresBuildBySnapshot,
pub fetch_failed: HashSet<PackageKey>,
/// Per-package CAS index, populated only when
/// [`CreateVirtualStore::node_linker`] is
@@ -233,6 +241,7 @@ impl CreateVirtualStore<'_> {
return Ok(CreateVirtualStoreOutput {
package_manifests: PackageManifests::new(),
side_effects_maps_by_snapshot: SideEffectsMapsBySnapshot::new(),
requires_build_by_snapshot: RequiresBuildBySnapshot::new(),
fetch_failed: HashSet::new(),
cas_paths_by_pkg_id: is_hoisted.then(CasPathsByPkgId::new),
});
@@ -540,6 +549,7 @@ impl CreateVirtualStore<'_> {
cas_paths: prefetched,
manifests: prefetched_manifests,
side_effects_maps: prefetched_side_effects,
requires_build: prefetched_requires_build,
} = prefetch_cas_paths(
store_index.clone(),
store_dir,
@@ -595,6 +605,8 @@ impl CreateVirtualStore<'_> {
HashMap::with_capacity(prefetched_manifests.len());
let mut side_effects_maps_by_snapshot: SideEffectsMapsBySnapshot =
HashMap::with_capacity(prefetched_side_effects.len());
let mut requires_build_by_snapshot: RequiresBuildBySnapshot =
HashMap::with_capacity(prefetched_requires_build.len());
// First pass: process *skipped* snapshots into the bin-
// manifest cache and the side-effects map. They don't enter
@@ -617,6 +629,11 @@ impl CreateVirtualStore<'_> {
side_effects_maps_by_snapshot
.insert((*snapshot_key).clone(), std::sync::Arc::clone(maps));
}
if let Some(cache_key) = cache_key.as_deref()
&& let Some(&requires_build) = prefetched_requires_build.get(cache_key)
{
requires_build_by_snapshot.insert((*snapshot_key).clone(), requires_build);
}
}
// Second pass: survivors. Same loop as above plus the
@@ -638,6 +655,11 @@ impl CreateVirtualStore<'_> {
side_effects_maps_by_snapshot
.insert((*snapshot_key).clone(), std::sync::Arc::clone(maps));
}
if let Some(cache_key) = cache_key.as_deref()
&& let Some(&requires_build) = prefetched_requires_build.get(cache_key)
{
requires_build_by_snapshot.insert((*snapshot_key).clone(), requires_build);
}
// Carry the cache key alongside the warm entry so the
// reporter can skip a duplicate package-status event when
// a resolve-time prefetch already emitted it.
@@ -745,14 +767,14 @@ impl CreateVirtualStore<'_> {
// future returns; under hoisted no slot was written and the
// CAS index is the only output.
let mut fetch_failed: HashSet<PackageKey> = HashSet::new();
let mut cold_cas_paths: Vec<(&PackageKey, &SnapshotEntry, HashMap<String, PathBuf>)> =
let mut cold_cas_paths: Vec<(&PackageKey, &SnapshotEntry, HashMap<String, PathBuf>, bool)> =
Vec::new();
if !cold.is_empty() {
let prefetched_ref = Some(&prefetched);
let verified_files_cache_ref = &verified_files_cache;
type ColdOutcome<'a> = (
Option<PackageKey>,
Option<(&'a PackageKey, &'a SnapshotEntry, HashMap<String, PathBuf>)>,
Option<(&'a PackageKey, &'a SnapshotEntry, HashMap<String, PathBuf>, bool)>,
);
let outcomes: Vec<ColdOutcome<'_>> = cold
.iter()
@@ -793,7 +815,10 @@ impl CreateVirtualStore<'_> {
.run::<Reporter>()
.await;
match result {
Ok(cas_paths) => Ok((None, Some((*snapshot_key, *snapshot, cas_paths)))),
Ok(cas_paths) => {
let requires_build = requires_build_from_cas_paths(&cas_paths);
Ok((None, Some((*snapshot_key, *snapshot, cas_paths, requires_build))))
}
Err(err) if snapshot.optional && is_fetch_side_failure(&err) => {
// Silent swallow, matching upstream. `tracing::warn!`
// gives operator visibility without polluting
@@ -832,6 +857,7 @@ impl CreateVirtualStore<'_> {
fetch_failed.insert(key);
}
if let Some(captured) = captured {
requires_build_by_snapshot.insert((*captured.0).clone(), captured.3);
cold_cas_paths.push(captured);
}
}
@@ -849,7 +875,7 @@ impl CreateVirtualStore<'_> {
if !is_hoisted && !cold_cas_paths.is_empty() {
let cold_slots: Vec<SlotLink<'_>> = cold_cas_paths
.iter()
.map(|(snapshot_key, snapshot, cas_paths)| SlotLink {
.map(|(snapshot_key, snapshot, cas_paths, _requires_build)| SlotLink {
snapshot_key,
snapshot,
cas_paths,
@@ -901,7 +927,7 @@ impl CreateVirtualStore<'_> {
// real install.
if let Some(map) = cas_paths_by_pkg_id.as_mut() {
map.reserve(cold_cas_paths.len());
for (snapshot_key, _snapshot, paths) in cold_cas_paths {
for (snapshot_key, _snapshot, paths, _requires_build) in cold_cas_paths {
// Mirrors upstream's `getPkgIdWithPatchHash` — strip
// the peer-graph suffix but keep `(patch_hash=...)` so
// patched packages share one CAS-paths entry across
@@ -922,12 +948,25 @@ impl CreateVirtualStore<'_> {
Ok(CreateVirtualStoreOutput {
package_manifests,
side_effects_maps_by_snapshot,
requires_build_by_snapshot,
fetch_failed,
cas_paths_by_pkg_id,
})
}
}
fn requires_build_from_cas_paths(cas_paths: &HashMap<String, PathBuf>) -> bool {
if files_include_install_scripts(cas_paths.keys()) {
return true;
}
let Some(package_json) = cas_paths.get("package.json") else { return false };
let Ok(contents) = fs::read_to_string(package_json) else { return false };
let Ok(manifest) = serde_json::from_str::<serde_json::Value>(&contents) else {
return false;
};
manifest_requires_build(&manifest)
}
struct SlotLink<'a> {
snapshot_key: &'a PackageKey,
snapshot: &'a SnapshotEntry,

View File

@@ -83,8 +83,14 @@ async fn cold_batch_links_slots_in_parallel() {
let source_dir = workspace_root.join("prefetched").join(package_name);
fs::create_dir_all(&source_dir).expect("create prefetched package dir");
let manifest_path = source_dir.join("package.json");
fs::write(&manifest_path, format!(r#"{{"name":"{package_name}","version":"1.0.0"}}"#))
.expect("write package manifest");
let manifest = if package_name == "cold-a" {
format!(
r#"{{"name":"{package_name}","version":"1.0.0","scripts":{{"postinstall":"node build.js"}}}}"#,
)
} else {
format!(r#"{{"name":"{package_name}","version":"1.0.0"}}"#)
};
fs::write(&manifest_path, manifest).expect("write package manifest");
let index_path = source_dir.join("index.js");
fs::write(&index_path, "module.exports = true\n").expect("write package body");
@@ -117,7 +123,7 @@ async fn cold_batch_links_slots_in_parallel() {
let probe =
crate::create_virtual_dir_by_snapshot::tests::LinkConcurrencyProbe::waiting_for_overlap();
CreateVirtualStore {
let output = CreateVirtualStore {
http_client: &pacquet_network::ThrottledClient::default(),
config,
packages: Some(&packages),
@@ -149,6 +155,10 @@ async fn cold_batch_links_slots_in_parallel() {
probe.max_concurrent(),
rayon::current_num_threads(),
);
let cold_a = key("cold-a", "1.0.0");
let cold_b = key("cold-b", "1.0.0");
assert_eq!(output.requires_build_by_snapshot.get(&cold_a), Some(&true));
assert_eq!(output.requires_build_by_snapshot.get(&cold_b), Some(&false));
}
const DUMMY_SHA512: &str = "sha512-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA==";

View File

@@ -696,6 +696,7 @@ where
let CreateVirtualStoreOutput {
package_manifests,
side_effects_maps_by_snapshot,
requires_build_by_snapshot,
fetch_failed,
cas_paths_by_pkg_id,
} = {
@@ -1064,6 +1065,7 @@ where
importers,
allow_build_policy: &allow_build_policy,
side_effects_maps_by_snapshot: Some(&side_effects_maps_by_snapshot),
requires_build_by_snapshot: Some(&requires_build_by_snapshot),
engine_name: engine_name.as_deref(),
side_effects_cache: config.side_effects_cache_read(),
side_effects_cache_write: config.side_effects_cache_write(),

View File

@@ -1315,6 +1315,7 @@ impl<DependencyGroupList> InstallWithFreshLockfile<'_, DependencyGroupList> {
cas_paths: prefetched_cas_paths,
manifests: prefetched_manifests,
side_effects_maps: _,
requires_build: _,
} = prefetch;
tracing::info!(
target: "pacquet::install::phase",
@@ -1429,6 +1430,7 @@ impl<DependencyGroupList> InstallWithFreshLockfile<'_, DependencyGroupList> {
let CreateVirtualStoreOutput {
package_manifests,
side_effects_maps_by_snapshot: _,
requires_build_by_snapshot: _,
fetch_failed: _,
// Populated only under `node_linker == Hoisted`; consumed by
// the hoisted-linker pass below to materialize the on-disk

View File

@@ -423,11 +423,19 @@ pub fn safe_read_package_json_from_dir(dir: &Path) -> Result<Option<Value>, Pack
/// directory. Missing manifests, IO errors, and parse errors all collapse to
/// `false` — pacquet cannot meaningfully build a package whose extracted
/// content cannot be inspected.
#[must_use]
pub fn pkg_requires_build(pkg_root: &Path) -> bool {
if pkg_root.join("binding.gyp").exists() || pkg_root.join(".hooks").is_dir() {
return true;
}
let Ok(Some(manifest)) = safe_read_package_json_from_dir(pkg_root) else { return false };
manifest_requires_build(&manifest)
}
/// Decide whether a parsed manifest declares lifecycle scripts that
/// make its package a build candidate.
#[must_use]
pub fn manifest_requires_build(manifest: &Value) -> bool {
manifest.get("scripts").and_then(Value::as_object).is_some_and(|scripts| {
scripts.contains_key("preinstall")
|| scripts.contains_key("install")
@@ -435,5 +443,23 @@ pub fn pkg_requires_build(pkg_root: &Path) -> bool {
})
}
/// Decide whether a store-index file key implies build hooks.
#[must_use]
pub fn file_path_requires_build(filename: &str) -> bool {
filename == "binding.gyp"
|| filename
.strip_prefix(".hooks")
.is_some_and(|suffix| suffix.starts_with('/') || suffix.starts_with('\\'))
}
#[must_use]
pub fn files_include_install_scripts<Filenames, Filename>(filenames: Filenames) -> bool
where
Filenames: IntoIterator<Item = Filename>,
Filename: AsRef<str>,
{
filenames.into_iter().any(|filename| file_path_requires_build(filename.as_ref()))
}
#[cfg(test)]
mod tests;

View File

@@ -11,11 +11,12 @@ license.workspace = true
repository.workspace = true
[dependencies]
pacquet-diagnostics = { workspace = true }
pacquet-fs = { workspace = true }
pacquet-network = { workspace = true }
pacquet-reporter = { workspace = true }
pacquet-store-dir = { workspace = true }
pacquet-diagnostics = { workspace = true }
pacquet-fs = { workspace = true }
pacquet-network = { workspace = true }
pacquet-package-manifest = { workspace = true }
pacquet-reporter = { workspace = true }
pacquet-store-dir = { workspace = true }
dashmap = { workspace = true }
derive_more = { workspace = true }

View File

@@ -12,6 +12,7 @@ use miette::Diagnostic;
use pacquet_fs::file_mode;
pub use pacquet_network::RetryOpts;
use pacquet_network::{AuthHeaders, ThrottledClient, UNPRIORITIZED};
use pacquet_package_manifest::{files_include_install_scripts, manifest_requires_build};
use pacquet_reporter::{
FetchingProgressLog, FetchingProgressMessage, LogEvent, LogLevel, ProgressLog, ProgressMessage,
Reporter, RequestRetryError, RequestRetryLog,
@@ -565,6 +566,8 @@ fn extract_tarball_entries(
// manifest is captured here too, off the raw payload slice.
let mut pending: Vec<PendingFile<'_>> = Vec::with_capacity(capacity);
let mut manifest = None;
let mut manifest_build_scripts = false;
let mut file_build_hooks = false;
for entry in entries {
let entry = entry.map_err(TarballError::ReadTarballEntries)?;
@@ -651,6 +654,9 @@ fn extract_tarball_entries(
{
continue;
}
if files_include_install_scripts([cleaned_entry_path.as_str()]) {
file_build_hooks = true;
}
// Capture the parsed manifest whenever we see `package.json`.
// Mirrors pnpm's `bundledManifest` pass-through at
// [pnpm/pnpm@4750fd370c]: pnpm stuffs the narrowed manifest
@@ -678,12 +684,16 @@ fn extract_tarball_entries(
// [`addFilesFromTarball`]: <https://github.com/pnpm/pnpm/blob/4750fd370c/store/cafs/src/addFilesFromTarball.ts#L41-L43>
if cleaned_entry_path == "package.json" {
match serde_json::from_slice::<serde_json::Value>(entry_data) {
Ok(parsed) => manifest = normalize_bundled_manifest(&parsed),
Ok(parsed) => {
manifest_build_scripts = manifest_requires_build(&parsed);
manifest = normalize_bundled_manifest(&parsed);
}
Err(error) => {
tracing::debug!(
?error,
"package.json in tarball failed to parse as JSON; bundled manifest cleared",
);
manifest_build_scripts = false;
manifest = None;
}
}
@@ -739,7 +749,7 @@ fn extract_tarball_entries(
let pkg_files_idx = PackageFilesIndex {
manifest,
requires_build: None,
requires_build: Some(manifest_build_scripts || file_build_hooks),
algo: "sha512".to_string(),
files,
side_effects: None,
@@ -1020,17 +1030,27 @@ pub type PrefetchedManifests = HashMap<String, Arc<serde_json::Value>>;
pub type PrefetchedSideEffectsMaps =
HashMap<String, Arc<HashMap<String, HashMap<String, PathBuf>>>>;
/// `requiresBuild` flags recovered from the same `index.db` rows as
/// [`PrefetchedCasPaths`]. Missing values in old rows are recomputed
/// from the bundled manifest plus verified file keys, mirroring
/// pnpm's worker fallback when `pkgFilesIndex.requiresBuild` is absent.
pub type PrefetchedRequiresBuild = HashMap<String, bool>;
type DecodedPrefetchRow =
(String, Option<Arc<serde_json::Value>>, Option<bool>, pacquet_store_dir::VerifyResult);
/// Output of [`prefetch_cas_paths`]: the warm-cache filesystem map
/// plus any bundled manifests and side-effects overlays recovered
/// from the same `index.db` rows. Bundled in a single struct so
/// callers can destructure all three after one `await`, rather than
/// the function having to thread three separate `spawn_blocking`
/// callers can destructure all cached facts after one `await`, rather
/// than the function having to thread several separate `spawn_blocking`
/// round-trips through.
#[derive(Default)]
pub struct PrefetchResult {
pub cas_paths: PrefetchedCasPaths,
pub manifests: PrefetchedManifests,
pub side_effects_maps: PrefetchedSideEffectsMaps,
pub requires_build: PrefetchedRequiresBuild,
}
/// Batch the entire warm-cache lookup phase into one `spawn_blocking`
@@ -1124,7 +1144,7 @@ pub async fn prefetch_cas_paths(
// `Option::take` so it travels back to the caller without
// an intermediate `Value::clone` of the JSON tree — the
// verify function only inspects `files`, never `manifest`.
let decoded: Vec<(String, Option<Arc<serde_json::Value>>, pacquet_store_dir::VerifyResult)> = raw
let decoded: Vec<DecodedPrefetchRow> = raw
.into_par_iter()
.filter_map(|(cache_key, bytes)| {
let mut entry: PackageFilesIndex = match pacquet_store_dir::decode_package_files_index(&bytes) {
@@ -1139,6 +1159,7 @@ pub async fn prefetch_cas_paths(
return None;
}
};
let stored_requires_build = entry.requires_build;
let manifest = entry.manifest.take().map(Arc::new);
let verify_result = if verify_store_integrity {
pacquet_store_dir::check_pkg_files_integrity(
@@ -1149,15 +1170,20 @@ pub async fn prefetch_cas_paths(
} else {
pacquet_store_dir::build_file_maps_from_index(store_dir, entry)
};
Some((cache_key, manifest, verify_result))
Some((cache_key, manifest, stored_requires_build, verify_result))
})
.collect();
let mut cas_paths = HashMap::with_capacity(decoded.len());
let mut manifests = HashMap::new();
let mut side_effects_maps = HashMap::new();
for (cache_key, manifest, verify_result) in decoded {
let mut requires_build = HashMap::with_capacity(decoded.len());
for (cache_key, manifest, stored_requires_build, verify_result) in decoded {
if verify_result.passed {
let calculated_requires_build = stored_requires_build.unwrap_or_else(|| {
manifest.as_deref().is_some_and(manifest_requires_build)
|| files_include_install_scripts(verify_result.files_map.keys())
});
if let Some(manifest) = manifest {
manifests.insert(cache_key.clone(), manifest);
}
@@ -1166,10 +1192,11 @@ pub async fn prefetch_cas_paths(
{
side_effects_maps.insert(cache_key.clone(), Arc::new(maps));
}
requires_build.insert(cache_key.clone(), calculated_requires_build);
cas_paths.insert(cache_key, Arc::new(verify_result.files_map));
}
}
PrefetchResult { cas_paths, manifests, side_effects_maps }
PrefetchResult { cas_paths, manifests, side_effects_maps, requires_build }
})
.await;
result.unwrap_or_else(|error| {

View File

@@ -489,6 +489,56 @@ async fn prefetch_cas_paths_returns_hits_for_live_index_rows() {
let map = prefetched.cas_paths.get(&index_key).expect("hit");
assert_eq!(map.get("package.json"), Some(&pkg_json_path));
assert_eq!(prefetched.requires_build.get(&index_key), Some(&false));
drop(store_dir);
}
#[tokio::test]
async fn prefetch_cas_paths_recomputes_requires_build_for_legacy_rows() {
let (store_dir, store_path) = tempdir_with_leaked_path();
let manifest_bytes = br#"{"name":"fake","scripts":{"postinstall":"node build.js"}}"#;
let (pkg_json_path, pkg_json_hash) = store_path.write_cas_file(manifest_bytes, false).unwrap();
let pkg_integrity = integrity(
"sha512-q/IXcMGuF8v7ZLf/JeYfE/pB4Wg1yxT6jXJz8JxRK7a4mJSXV1QKMXDPfZkvMHTZpYxWBDoJiXtptDWFnoCA2w==",
);
let pkg_id = "fake@1.0.0";
let index_key = store_index_key(&pkg_integrity.to_string(), pkg_id);
let mut files = HashMap::new();
files.insert(
"package.json".to_string(),
CafsFileInfo {
digest: format!("{pkg_json_hash:x}"),
mode: 0o644,
size: manifest_bytes.len() as u64,
checked_at: None,
},
);
let entry = PackageFilesIndex {
manifest: Some(serde_json::from_slice(manifest_bytes).unwrap()),
requires_build: None,
algo: "sha512".to_string(),
files,
side_effects: None,
};
let index = StoreIndex::open_in(store_path).unwrap();
index.set(&index_key, &entry).unwrap();
drop(index);
let prefetched = prefetch_cas_paths(
StoreIndex::shared_readonly_in(store_path),
store_path,
vec![index_key.clone()],
true,
SharedVerifiedFilesCache::default(),
)
.await;
let map = prefetched.cas_paths.get(&index_key).expect("hit");
assert_eq!(map.get("package.json"), Some(&pkg_json_path));
assert_eq!(prefetched.requires_build.get(&index_key), Some(&true));
drop(store_dir);
}
@@ -1018,10 +1068,37 @@ fn extract_tarball_applies_ignore_filter_dropping_entries_from_both_maps() {
!pkg_files_idx.files.contains_key("lib/node_modules/npm/package.json"),
"ignore filter should drop bundled npm from pkg_files_idx.files",
);
assert_eq!(pkg_files_idx.requires_build, Some(false));
drop(tempdir);
}
#[test]
fn extract_tarball_records_requires_build_from_manifest() {
let (tempdir, store_path) = tempdir_with_leaked_path();
let mut tar_bytes = Vec::new();
{
let mut builder = tar::Builder::new(&mut tar_bytes);
let body = br#"{"scripts":{"install":"node-gyp rebuild"}}"#;
let mut header = tar::Header::new_gnu();
header.set_size(body.len() as u64);
header.set_mode(0o644);
header.set_entry_type(tar::EntryType::Regular);
header.set_cksum();
builder
.append_data(&mut header, "package/package.json", &body[..])
.expect("append manifest");
builder.finish().expect("finalize tar");
}
let (_cas_paths, pkg_files_idx) =
extract_tarball_entries(&tar_bytes, store_path, None).expect("tarball extraction");
assert_eq!(pkg_files_idx.requires_build, Some(true));
drop(tempdir);
}
/// `RetryOpts::default()` reproduces pnpm's
/// `network/fetch/src/fetch.ts` defaults: 2 retries, factor 10,
/// minTimeout 10 s, maxTimeout 60 s. The first post-failure delay