fix(pacquet/directory-fetcher,pacquet/package-manager): address CodeRabbit review on #11678

Three parity / hardening fixes flagged in the CodeRabbit review on PR
#11678:

1. **Carve out directory snapshots from the current-lockfile-skip
   gate** (`create_virtual_store.rs`). Directory-typed snapshots have
   `integrity() == None`; without the carve-out `integrity_equal`
   collapsed `None == None == true` and the skip filter dropped the
   snapshot whenever a slot for it existed on disk, so a second
   install never re-walked the (mutable) source dir. Mirrors pnpm's
   `!isDirectoryDep` clause in `depIsPresent` at
   <https://github.com/pnpm/pnpm/blob/94240bc046/deps/graph-builder/src/lockfileToDepGraph.ts#L226-L228>.

2. **Add `DirectoryFetch` to `is_fetch_side_failure`**
   (`create_virtual_store.rs`). Upstream's catch at
   [`lockfileToDepGraph.ts:286-298`](https://github.com/pnpm/pnpm/blob/94240bc046/deps/graph-builder/src/lockfileToDepGraph.ts#L286-L298)
   wraps the whole `fetchPackage` dispatch, so directory-fetcher
   errors on optional snapshots are swallowed uniformly with tarball
   / git fetch errors. Without this, an optional injected-directory
   dep whose source was missing would hard-fail the install instead
   of being dropped.

3. **Symlink-cycle guard in `walk_all_inner`** (`directory-fetcher/
   src/walker.rs`). A `loop -> .` (or any ancestor-pointing) symlink
   previously sank the walker into infinite recursion until either
   ENAMETOOLONG or stack overflow fired. Skip-on-revisit keyed off
   `fs::canonicalize`, matching the pattern
   `pacquet_git_fetcher::packlist` already uses for
   `bundleDependencies` cycles. Pnpm's directory-fetcher has the same
   vulnerability; the guard is a defensible divergence because the
   positive-case behavior is identical to pnpm and the cycle case
   degrades from "crash" to "skip with a `tracing::warn`".

Added a regression test
(`walk_all_files_terminates_on_symlink_cycle`) that points a
`loop -> root` symlink at the walk root and asserts the cycle guard
short-circuits before any `loop/` descendant is recorded.

---
Written by an agent (Claude Code, claude-opus-4-7).
This commit is contained in:
Zoltan Kochan
2026-05-16 11:35:48 +02:00
parent 31ca1fdfaf
commit f2db87c0e3
3 changed files with 82 additions and 5 deletions

View File

@@ -16,7 +16,7 @@
use crate::error::DirectoryFetcherError;
use pacquet_package_manifest::safe_read_package_json_from_dir;
use std::{
collections::HashMap,
collections::{HashMap, HashSet},
fs::{self, Metadata},
io,
path::{Path, PathBuf},
@@ -39,7 +39,8 @@ pub(crate) fn walk_all_files(
resolve_symlinks: bool,
) -> Result<FilesMap, DirectoryFetcherError> {
let mut out = FilesMap::new();
walk_all_inner(dir, "", resolve_symlinks, &mut out)?;
let mut visited = HashSet::new();
walk_all_inner(dir, "", resolve_symlinks, &mut visited, &mut out)?;
Ok(out)
}
@@ -47,8 +48,31 @@ fn walk_all_inner(
dir: &Path,
rel_prefix: &str,
resolve_symlinks: bool,
visited: &mut HashSet<PathBuf>,
out: &mut FilesMap,
) -> Result<(), DirectoryFetcherError> {
// Symlink-cycle guard. Pnpm's directory-fetcher recurses without
// a visited-set so a `foo -> .` (or any ancestor-pointing
// symlink) sinks the whole walk into infinite recursion until the
// path exceeds OS limits and `read_dir` finally errors with
// ENAMETOOLONG. Stack overflow is also reachable on platforms
// where the path-too-long error has a higher ceiling than the
// default Rust stack. Skip-on-revisit instead, matching the
// pattern `pacquet_git_fetcher::packlist` already uses for
// `bundleDependencies` cycles. The check is keyed off
// `fs::canonicalize` so an unresolved symlink and its target
// share one entry; canonicalisation failure (permission denied,
// for example) falls back to the raw path so the guard still
// catches identity loops.
let canonical = fs::canonicalize(dir).unwrap_or_else(|_| dir.to_path_buf());
if !visited.insert(canonical) {
tracing::warn!(
target: "pacquet::directory_fetcher",
dir = %dir.display(),
"symlink cycle: directory already visited at this canonical path; skipping",
);
return Ok(());
}
let entries = fs::read_dir(dir)
.map_err(|source| DirectoryFetcherError::Io { dir: dir.display().to_string(), source })?;
for entry in entries {
@@ -78,7 +102,7 @@ fn walk_all_inner(
format!("{rel_prefix}/{file_name_str}")
};
if resolved.metadata.is_dir() {
walk_all_inner(&resolved.path, &rel, resolve_symlinks, out)?;
walk_all_inner(&resolved.path, &rel, resolve_symlinks, visited, out)?;
} else {
out.insert(rel, resolved.path);
}

View File

@@ -81,6 +81,34 @@ fn walk_all_files_on_empty_directory_returns_empty_map() {
assert!(out.is_empty());
}
#[cfg(unix)]
#[test]
fn walk_all_files_terminates_on_symlink_cycle() {
use std::os::unix::fs::symlink;
// A `loop -> .` symlink would, without the cycle guard, sink the
// walker into infinite recursion until either ENAMETOOLONG fires
// or the Rust stack runs out. The visited-set guard keys off
// `fs::canonicalize`, so `root/loop` canonicalises to `root` and
// the second recursion bails before reading any further. The
// direct children of `root` (incl. `real.txt`) still land in the
// output; nothing under `loop/` does, because the recursive call
// returns immediately on the cycle.
let dir = tempdir().unwrap();
let root = dir.path();
touch(root, "real.txt");
symlink(root, root.join("loop")).unwrap();
let out = walk_all_files(root, false).unwrap();
let rels: BTreeMap<_, _> = collect_rels(root, out);
assert!(rels.contains_key("real.txt"), "direct children must still be walked: {rels:?}");
assert!(
rels.keys().all(|k| !k.starts_with("loop/")),
"cycle guard must short-circuit before any `loop/` descendant is recorded: {rels:?}",
);
}
#[cfg(unix)]
#[test]
fn walk_all_files_skips_broken_symlink_without_resolve_symlinks() {

View File

@@ -368,12 +368,31 @@ impl<'a> CreateVirtualStore<'a> {
let Some(current_snapshot) = current_snapshots.get(*snapshot_key) else {
return true;
};
let wanted_metadata = packages.get(&snapshot_key.without_peer());
// Directory-typed snapshots carry mutable local
// source: the user can edit `file:./local-pkg` files
// between installs and pacquet must re-walk them on
// every install, otherwise the slot drifts. Mirrors
// upstream's `!isDirectoryDep` clause in `depIsPresent`
// at
// <https://github.com/pnpm/pnpm/blob/94240bc046/deps/graph-builder/src/lockfileToDepGraph.ts#L226-L228>
// — pnpm forces directory snapshots through the cold
// path for the same reason. Without this carve-out
// both `current` and `wanted` resolutions report
// `integrity() == None`, `integrity_equal` returns
// true, the slot directory check passes, and the
// directory-fetcher never runs on the second install.
if matches!(
wanted_metadata.map(|m| &m.resolution),
Some(LockfileResolution::Directory(_))
) {
return true;
}
if !snapshot_deps_equal(current_snapshot, snapshot) {
return true;
}
let current_metadata =
current_packages.and_then(|p| p.get(&snapshot_key.without_peer()));
let wanted_metadata = packages.get(&snapshot_key.without_peer());
if !integrity_equal(current_metadata, wanted_metadata) {
return true;
}
@@ -1028,6 +1047,11 @@ fn integrity_equal(current: Option<&PackageMetadata>, wanted: Option<&PackageMet
/// - `GitFetch` — `git` CLI clone / checkout / preparePackage /
/// packlist / CAS import. Equivalent to upstream's git-fetcher
/// inside the same `fetchPackage` dispatch.
/// - `DirectoryFetch` — local-directory walk / manifest read /
/// packlist for injected workspace deps. Equivalent to upstream's
/// directory-fetcher inside the same `fetchPackage` dispatch; pnpm
/// swallows the throw for optional snapshots uniformly with the
/// tarball / git paths.
///
/// Excluded (propagate even for optional snapshots, matching
/// upstream's post-`fetching()` `linkPkg` path that sits outside
@@ -1042,7 +1066,8 @@ fn is_fetch_side_failure(err: &InstallPackageBySnapshotError) -> bool {
matches!(
err,
InstallPackageBySnapshotError::DownloadTarball(_)
| InstallPackageBySnapshotError::GitFetch(_),
| InstallPackageBySnapshotError::GitFetch(_)
| InstallPackageBySnapshotError::DirectoryFetch(_),
)
}