From f2db87c0e3546ce6230d2b102fc7f834fff75b9c Mon Sep 17 00:00:00 2001 From: Zoltan Kochan Date: Sat, 16 May 2026 11:35:48 +0200 Subject: [PATCH] fix(pacquet/directory-fetcher,pacquet/package-manager): address CodeRabbit review on #11678 Three parity / hardening fixes flagged in the CodeRabbit review on PR #11678: 1. **Carve out directory snapshots from the current-lockfile-skip gate** (`create_virtual_store.rs`). Directory-typed snapshots have `integrity() == None`; without the carve-out `integrity_equal` collapsed `None == None == true` and the skip filter dropped the snapshot whenever a slot for it existed on disk, so a second install never re-walked the (mutable) source dir. Mirrors pnpm's `!isDirectoryDep` clause in `depIsPresent` at . 2. **Add `DirectoryFetch` to `is_fetch_side_failure`** (`create_virtual_store.rs`). Upstream's catch at [`lockfileToDepGraph.ts:286-298`](https://github.com/pnpm/pnpm/blob/94240bc046/deps/graph-builder/src/lockfileToDepGraph.ts#L286-L298) wraps the whole `fetchPackage` dispatch, so directory-fetcher errors on optional snapshots are swallowed uniformly with tarball / git fetch errors. Without this, an optional injected-directory dep whose source was missing would hard-fail the install instead of being dropped. 3. **Symlink-cycle guard in `walk_all_inner`** (`directory-fetcher/ src/walker.rs`). A `loop -> .` (or any ancestor-pointing) symlink previously sank the walker into infinite recursion until either ENAMETOOLONG or stack overflow fired. Skip-on-revisit keyed off `fs::canonicalize`, matching the pattern `pacquet_git_fetcher::packlist` already uses for `bundleDependencies` cycles. Pnpm's directory-fetcher has the same vulnerability; the guard is a defensible divergence because the positive-case behavior is identical to pnpm and the cycle case degrades from "crash" to "skip with a `tracing::warn`". Added a regression test (`walk_all_files_terminates_on_symlink_cycle`) that points a `loop -> root` symlink at the walk root and asserts the cycle guard short-circuits before any `loop/` descendant is recorded. --- Written by an agent (Claude Code, claude-opus-4-7). --- .../crates/directory-fetcher/src/walker.rs | 30 +++++++++++++++++-- .../directory-fetcher/src/walker/tests.rs | 28 +++++++++++++++++ .../src/create_virtual_store.rs | 29 ++++++++++++++++-- 3 files changed, 82 insertions(+), 5 deletions(-) diff --git a/pacquet/crates/directory-fetcher/src/walker.rs b/pacquet/crates/directory-fetcher/src/walker.rs index 805a310d2e..cef3ed468a 100644 --- a/pacquet/crates/directory-fetcher/src/walker.rs +++ b/pacquet/crates/directory-fetcher/src/walker.rs @@ -16,7 +16,7 @@ use crate::error::DirectoryFetcherError; use pacquet_package_manifest::safe_read_package_json_from_dir; use std::{ - collections::HashMap, + collections::{HashMap, HashSet}, fs::{self, Metadata}, io, path::{Path, PathBuf}, @@ -39,7 +39,8 @@ pub(crate) fn walk_all_files( resolve_symlinks: bool, ) -> Result { let mut out = FilesMap::new(); - walk_all_inner(dir, "", resolve_symlinks, &mut out)?; + let mut visited = HashSet::new(); + walk_all_inner(dir, "", resolve_symlinks, &mut visited, &mut out)?; Ok(out) } @@ -47,8 +48,31 @@ fn walk_all_inner( dir: &Path, rel_prefix: &str, resolve_symlinks: bool, + visited: &mut HashSet, out: &mut FilesMap, ) -> Result<(), DirectoryFetcherError> { + // Symlink-cycle guard. Pnpm's directory-fetcher recurses without + // a visited-set so a `foo -> .` (or any ancestor-pointing + // symlink) sinks the whole walk into infinite recursion until the + // path exceeds OS limits and `read_dir` finally errors with + // ENAMETOOLONG. Stack overflow is also reachable on platforms + // where the path-too-long error has a higher ceiling than the + // default Rust stack. Skip-on-revisit instead, matching the + // pattern `pacquet_git_fetcher::packlist` already uses for + // `bundleDependencies` cycles. The check is keyed off + // `fs::canonicalize` so an unresolved symlink and its target + // share one entry; canonicalisation failure (permission denied, + // for example) falls back to the raw path so the guard still + // catches identity loops. + let canonical = fs::canonicalize(dir).unwrap_or_else(|_| dir.to_path_buf()); + if !visited.insert(canonical) { + tracing::warn!( + target: "pacquet::directory_fetcher", + dir = %dir.display(), + "symlink cycle: directory already visited at this canonical path; skipping", + ); + return Ok(()); + } let entries = fs::read_dir(dir) .map_err(|source| DirectoryFetcherError::Io { dir: dir.display().to_string(), source })?; for entry in entries { @@ -78,7 +102,7 @@ fn walk_all_inner( format!("{rel_prefix}/{file_name_str}") }; if resolved.metadata.is_dir() { - walk_all_inner(&resolved.path, &rel, resolve_symlinks, out)?; + walk_all_inner(&resolved.path, &rel, resolve_symlinks, visited, out)?; } else { out.insert(rel, resolved.path); } diff --git a/pacquet/crates/directory-fetcher/src/walker/tests.rs b/pacquet/crates/directory-fetcher/src/walker/tests.rs index 676495841b..0f238fa618 100644 --- a/pacquet/crates/directory-fetcher/src/walker/tests.rs +++ b/pacquet/crates/directory-fetcher/src/walker/tests.rs @@ -81,6 +81,34 @@ fn walk_all_files_on_empty_directory_returns_empty_map() { assert!(out.is_empty()); } +#[cfg(unix)] +#[test] +fn walk_all_files_terminates_on_symlink_cycle() { + use std::os::unix::fs::symlink; + + // A `loop -> .` symlink would, without the cycle guard, sink the + // walker into infinite recursion until either ENAMETOOLONG fires + // or the Rust stack runs out. The visited-set guard keys off + // `fs::canonicalize`, so `root/loop` canonicalises to `root` and + // the second recursion bails before reading any further. The + // direct children of `root` (incl. `real.txt`) still land in the + // output; nothing under `loop/` does, because the recursive call + // returns immediately on the cycle. + let dir = tempdir().unwrap(); + let root = dir.path(); + touch(root, "real.txt"); + symlink(root, root.join("loop")).unwrap(); + + let out = walk_all_files(root, false).unwrap(); + let rels: BTreeMap<_, _> = collect_rels(root, out); + + assert!(rels.contains_key("real.txt"), "direct children must still be walked: {rels:?}"); + assert!( + rels.keys().all(|k| !k.starts_with("loop/")), + "cycle guard must short-circuit before any `loop/` descendant is recorded: {rels:?}", + ); +} + #[cfg(unix)] #[test] fn walk_all_files_skips_broken_symlink_without_resolve_symlinks() { diff --git a/pacquet/crates/package-manager/src/create_virtual_store.rs b/pacquet/crates/package-manager/src/create_virtual_store.rs index b7590c6437..f11c83b3f8 100644 --- a/pacquet/crates/package-manager/src/create_virtual_store.rs +++ b/pacquet/crates/package-manager/src/create_virtual_store.rs @@ -368,12 +368,31 @@ impl<'a> CreateVirtualStore<'a> { let Some(current_snapshot) = current_snapshots.get(*snapshot_key) else { return true; }; + let wanted_metadata = packages.get(&snapshot_key.without_peer()); + // Directory-typed snapshots carry mutable local + // source: the user can edit `file:./local-pkg` files + // between installs and pacquet must re-walk them on + // every install, otherwise the slot drifts. Mirrors + // upstream's `!isDirectoryDep` clause in `depIsPresent` + // at + // + // — pnpm forces directory snapshots through the cold + // path for the same reason. Without this carve-out + // both `current` and `wanted` resolutions report + // `integrity() == None`, `integrity_equal` returns + // true, the slot directory check passes, and the + // directory-fetcher never runs on the second install. + if matches!( + wanted_metadata.map(|m| &m.resolution), + Some(LockfileResolution::Directory(_)) + ) { + return true; + } if !snapshot_deps_equal(current_snapshot, snapshot) { return true; } let current_metadata = current_packages.and_then(|p| p.get(&snapshot_key.without_peer())); - let wanted_metadata = packages.get(&snapshot_key.without_peer()); if !integrity_equal(current_metadata, wanted_metadata) { return true; } @@ -1028,6 +1047,11 @@ fn integrity_equal(current: Option<&PackageMetadata>, wanted: Option<&PackageMet /// - `GitFetch` — `git` CLI clone / checkout / preparePackage / /// packlist / CAS import. Equivalent to upstream's git-fetcher /// inside the same `fetchPackage` dispatch. +/// - `DirectoryFetch` — local-directory walk / manifest read / +/// packlist for injected workspace deps. Equivalent to upstream's +/// directory-fetcher inside the same `fetchPackage` dispatch; pnpm +/// swallows the throw for optional snapshots uniformly with the +/// tarball / git paths. /// /// Excluded (propagate even for optional snapshots, matching /// upstream's post-`fetching()` `linkPkg` path that sits outside @@ -1042,7 +1066,8 @@ fn is_fetch_side_failure(err: &InstallPackageBySnapshotError) -> bool { matches!( err, InstallPackageBySnapshotError::DownloadTarball(_) - | InstallPackageBySnapshotError::GitFetch(_), + | InstallPackageBySnapshotError::GitFetch(_) + | InstallPackageBySnapshotError::DirectoryFetch(_), ) }