diff --git a/.changeset/fix-bin-linker-node-exe-warning.md b/.changeset/fix-bin-linker-node-exe-warning.md new file mode 100644 index 0000000000..5c2b83bdf6 --- /dev/null +++ b/.changeset/fix-bin-linker-node-exe-warning.md @@ -0,0 +1,6 @@ +--- +"@pnpm/bins.linker": patch +"pnpm": patch +--- + +Skip the redundant "target bin directory already contains an exe called node" warning on Windows when the existing `node.exe` already matches the target (same hard link or identical content) [pnpm/pnpm#12203](https://github.com/pnpm/pnpm/issues/12203). diff --git a/Cargo.lock b/Cargo.lock index d332408c1a..de99c86b0f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3548,6 +3548,7 @@ dependencies = [ "pacquet-fs", "pipe-trait", "rayon", + "same-file", "serde_json", "tempfile", "walkdir", diff --git a/Cargo.toml b/Cargo.toml index 1637338b94..5ad02063a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -135,6 +135,7 @@ pgp = { version = "0.19.0", default-features = false } rayon = { version = "1.12.0" } rmp-serde = { version = "1.3.0" } rusqlite = { version = "0.39.0", features = ["bundled"] } +same-file = { version = "1.0.6" } serde = { version = "1.0.228", features = ["derive"] } serde_json = { version = "1.0.150", features = ["preserve_order", "raw_value"] } serde-saphyr = { version = "0.0.27" } diff --git a/bins/linker/src/index.ts b/bins/linker/src/index.ts index b7f16385f5..201545d1b7 100644 --- a/bins/linker/src/index.ts +++ b/bins/linker/src/index.ts @@ -280,15 +280,23 @@ async function linkBin (cmd: CommandInfo, binsDir: string, opts?: LinkBinOptions } catch {} if (IS_WINDOWS) { const exePath = path.join(binsDir, `${cmd.name}${getExeExtension()}`) - if (existsSync(exePath)) { - globalWarn(`The target bin directory already contains an exe called ${cmd.name}, so removing ${exePath}`) - await rimraf(exePath) - } - // node.exe must exist as a real executable, not a cmd-shim wrapper. + // node.exe is the only bin pnpm links directly as a real executable rather + // than through a cmd-shim, so the existing-exe handling only applies to it. // We could update our own cmd shims to support node.cmd, but we can't // control npm's cmd shims, which break when node resolves to node.cmd. // npm's cmd shims use `IF EXIST "%~dp0\node.exe"` to find the node binary. - if (cmd.name === 'node' && cmd.path.toLowerCase().endsWith('.exe')) { + const isNodeExe = cmd.name === 'node' && cmd.path.toLowerCase().endsWith('.exe') + if (existsSync(exePath)) { + // Skip warning and re-linking when the existing node.exe already matches + // the target, otherwise every command that re-links node would spam the + // warning below on warm installs. + if (isNodeExe && await isSameFile(exePath, cmd.path)) { + return + } + globalWarn(`The target bin directory already contains an exe called ${cmd.name}, so removing ${exePath}`) + await rimraf(exePath) + } + if (isNodeExe) { try { await fs.link(cmd.path, exePath) } catch { @@ -361,6 +369,67 @@ async function linkBin (cmd: CommandInfo, binsDir: string, opts?: LinkBinOptions } } +// Reports whether two paths refer to the same file. A matching inode/device +// pair (read as BigInts to avoid the precision loss of NTFS 64-bit file IDs) +// proves a hard link cheaply. Whenever identity can't be established that way — +// because the inodes genuinely differ or because Windows reports an unreliable +// zero inode — we fall back to comparing the file contents after a quick size +// check, which also treats a byte-identical copy as the same file. +async function isSameFile (pathA: string, pathB: string): Promise { + const [statA, statB] = await Promise.all([ + fs.stat(pathA, { bigint: true }).catch(() => null), + fs.stat(pathB, { bigint: true }).catch(() => null), + ]) + if (statA == null || statB == null) return false + if (statA.ino && statB.ino && statA.ino === statB.ino && statA.dev === statB.dev) { + return true + } + if (statA.size !== statB.size) return false + return haveEqualContents(pathA, pathB) +} + +const FILE_COMPARE_CHUNK_SIZE = 64 * 1024 + +// Compares two equally-sized files chunk by chunk, so an executable is never +// fully buffered in memory and a mismatch returns as early as possible. +async function haveEqualContents (pathA: string, pathB: string): Promise { + const [fhA, fhB] = await Promise.all([ + fs.open(pathA, 'r').catch(() => null), + fs.open(pathB, 'r').catch(() => null), + ]) + if (fhA == null || fhB == null) { + await fhA?.close().catch(() => {}) + await fhB?.close().catch(() => {}) + return false + } + try { + const bufA = Buffer.alloc(FILE_COMPARE_CHUNK_SIZE) + const bufB = Buffer.alloc(FILE_COMPARE_CHUNK_SIZE) + let position = 0 + for (;;) { + // Reading sequentially is intentional: each iteration compares one chunk + // and stops early on a mismatch or EOF. + const [readA, readB] = await Promise.all([ // eslint-disable-line no-await-in-loop + fhA.read(bufA, 0, FILE_COMPARE_CHUNK_SIZE, position), + fhB.read(bufB, 0, FILE_COMPARE_CHUNK_SIZE, position), + ]) + if (readA.bytesRead !== readB.bytesRead) return false + if (readA.bytesRead === 0) return true + if (!bufA.subarray(0, readA.bytesRead).equals(bufB.subarray(0, readB.bytesRead))) { + return false + } + position += readA.bytesRead + } + } catch { + // A transient read error must not abort bin linking: treat the files as + // different so the caller falls back to the warn + remove + relink path. + return false + } finally { + await fhA.close().catch(() => {}) + await fhB.close().catch(() => {}) + } +} + // `fixBin` chmods the bin's source file (which lives inside the store) to make // it executable and rewrites a Windows CRLF shebang to LF. Under the global // virtual store that source is `{storeDir}/links/...`, so on a read-only store diff --git a/bins/linker/test/index.ts b/bins/linker/test/index.ts index 2b66bfe0ff..1ac0844e84 100644 --- a/bins/linker/test/index.ts +++ b/bins/linker/test/index.ts @@ -733,6 +733,70 @@ describe('node binary linking', () => { // No cmd-shim should be created since we return early expect(fs.existsSync(path.join(binTarget, `node${CMD_EXTENSION}`))).toBe(false) }) + + testOnWindows('linkBinsOfPackages() does not warn when node.exe is already the correct hardlink', async () => { + const binTarget = temporaryDirectory() + const nodeDir = temporaryDirectory() + + fs.writeFileSync(path.join(nodeDir, 'node.exe'), 'fake-node-binary', 'utf8') + + const pkgs = [ + { + location: nodeDir, + manifest: { + name: 'node', + version: '20.0.0', + bin: { node: 'node.exe' }, + }, + }, + ] + + // First call creates the hardlink + await linkBinsOfPackages(pkgs, binTarget) + jest.mocked(globalWarn).mockClear() + + // Second call should not warn because the .exe is already the correct hardlink + await linkBinsOfPackages(pkgs, binTarget) + + // The absence of a warning is the regression signal: the warn and the + // `rimraf` that recreates node.exe live in the same branch, so no warning + // means node.exe was left untouched. We don't assert on inode identity + // because node.exe may legitimately be a copy rather than a hardlink (the + // implementation falls back to copyFile when hardlinking fails). + expect(globalWarn).not.toHaveBeenCalled() + const exePath = path.join(binTarget, 'node.exe') + expect(fs.existsSync(exePath)).toBe(true) + expect(fs.readFileSync(exePath, 'utf8')).toBe('fake-node-binary') + }) + + testOnWindows('linkBinsOfPackages() does not warn when node.exe has identical content but is not a hardlink', async () => { + const binTarget = temporaryDirectory() + const nodeDir = temporaryDirectory() + + fs.writeFileSync(path.join(nodeDir, 'node.exe'), 'fake-node-binary', 'utf8') + // Pre-place an independent copy with identical content (different inode), as + // happens on filesystems where Windows reports a zero inode and the previous + // link fell back to a copy. + fs.writeFileSync(path.join(binTarget, 'node.exe'), 'fake-node-binary', 'utf8') + + const pkgs = [ + { + location: nodeDir, + manifest: { + name: 'node', + version: '20.0.0', + bin: { node: 'node.exe' }, + }, + }, + ] + + await linkBinsOfPackages(pkgs, binTarget) + + expect(globalWarn).not.toHaveBeenCalled() + const exePath = path.join(binTarget, 'node.exe') + expect(fs.existsSync(exePath)).toBe(true) + expect(fs.readFileSync(exePath, 'utf8')).toBe('fake-node-binary') + }) }) test('linkBins() resolves conflicts using BIN_OWNER_OVERRIDES (npx owned by npm)', async () => { diff --git a/pacquet/crates/cmd-shim/Cargo.toml b/pacquet/crates/cmd-shim/Cargo.toml index 111ade9f6e..39e1dc3e14 100644 --- a/pacquet/crates/cmd-shim/Cargo.toml +++ b/pacquet/crates/cmd-shim/Cargo.toml @@ -17,6 +17,7 @@ derive_more = { workspace = true } miette = { workspace = true } pipe-trait = { workspace = true } rayon = { workspace = true } +same-file = { workspace = true } serde_json = { workspace = true } walkdir = { workspace = true } diff --git a/pacquet/crates/cmd-shim/src/link_bins.rs b/pacquet/crates/cmd-shim/src/link_bins.rs index b70f792857..7e09cc33d2 100644 --- a/pacquet/crates/cmd-shim/src/link_bins.rs +++ b/pacquet/crates/cmd-shim/src/link_bins.rs @@ -588,6 +588,13 @@ fn link_node_bin(target_path: &Path, shim_path: &Path) -> Result Result bool { + if let (Ok(handle_a), Ok(handle_b)) = + (same_file::Handle::from_path(a), same_file::Handle::from_path(b)) + && handle_a == handle_b + { + return true; + } + match (std::fs::metadata(a), std::fs::metadata(b)) { + (Ok(meta_a), Ok(meta_b)) => meta_a.len() == meta_b.len() && have_equal_contents(a, b), + _ => false, + } +} + +/// Compare two equally-sized files chunk by chunk, so an executable is never +/// fully buffered in memory and a mismatch returns as early as possible. +#[cfg(windows)] +fn have_equal_contents(a: &Path, b: &Path) -> bool { + const CHUNK_SIZE: usize = 64 * 1024; + let (Ok(mut file_a), Ok(mut file_b)) = (std::fs::File::open(a), std::fs::File::open(b)) else { + return false; + }; + let mut buf_a = vec![0u8; CHUNK_SIZE]; + let mut buf_b = vec![0u8; CHUNK_SIZE]; + loop { + let (Ok(read_a), Ok(read_b)) = + (read_chunk(&mut file_a, &mut buf_a), read_chunk(&mut file_b, &mut buf_b)) + else { + return false; + }; + if read_a != read_b { + return false; + } + if read_a == 0 { + return true; + } + if buf_a[..read_a] != buf_b[..read_b] { + return false; + } + } +} + +/// Read up to `buf.len()` bytes, looping over short reads so a full chunk is +/// only short at end of file. Like [`std::io::Read::read_exact`] but tolerant +/// of EOF. +#[cfg(windows)] +fn read_chunk(reader: &mut impl std::io::Read, buf: &mut [u8]) -> io::Result { + let mut filled = 0; + while filled < buf.len() { + match reader.read(&mut buf[filled..]) { + Ok(0) => break, + Ok(n) => filled += n, + Err(error) if error.kind() == io::ErrorKind::Interrupted => {} + Err(error) => return Err(error), + } + } + Ok(filled) +} + /// Remove an existing dirent at `path`, swallowing `NotFound`. Used by /// [`link_node_bin`] to clear any prior shim / symlink / hardlink /// before laying down the new one. Any other IO error (`PermissionDenied`, diff --git a/pacquet/crates/cmd-shim/src/link_bins/tests.rs b/pacquet/crates/cmd-shim/src/link_bins/tests.rs index 12d201ee55..2f7329f085 100644 --- a/pacquet/crates/cmd-shim/src/link_bins/tests.rs +++ b/pacquet/crates/cmd-shim/src/link_bins/tests.rs @@ -1272,6 +1272,55 @@ fn link_node_bin_hardlinks_node_exe_on_windows() { ); } +/// Windows-only: when `node.exe` already has identical content to the +/// source binary (e.g. a prior copy-fallback install), the linker must +/// leave it in place instead of removing and relinking it. Exercises the +/// content-comparison fallback, since the pre-existing copy has a different +/// file identity than the source. Mirrors pnpm's same-file early-return at +/// . +#[cfg(windows)] +#[test] +fn link_node_bin_skips_relink_when_node_exe_already_correct() { + use same_file::Handle; + let tmp = tempdir().unwrap(); + let bin_target = tmp.path().join("bin_target"); + create_dir_all(&bin_target).unwrap(); + let node_dir = tmp.path().join("node_pkg"); + create_dir_all(&node_dir).unwrap(); + write_file(node_dir.join("node.exe"), "fake-node-binary").unwrap(); + // Pre-place an independent copy with identical content (a different file + // identity), as an earlier copy-fallback install would leave behind. + write_file(bin_target.join("node.exe"), "fake-node-binary").unwrap(); + + write_file( + node_dir.join("package.json"), + json!({"name": "node", "version": "20.0.0", "bin": {"node": "node.exe"}}).to_string(), + ) + .unwrap(); + let manifest: Value = + serde_json::from_slice(&read_file(node_dir.join("package.json")).unwrap()).unwrap(); + link_bins_of_packages::( + &[PackageBinSource::new(node_dir.clone(), Arc::new(manifest))], + &bin_target, + ) + .unwrap(); + + let exe = bin_target.join("node.exe"); + assert_eq!(read_to_string(&exe).unwrap(), "fake-node-binary"); + // The pre-existing copy is left in place rather than removed and relinked: + // node.exe must not become a hardlink to the source binary. When file + // identity can't be obtained (the production code tolerates this), treat + // them as distinct rather than panicking on a failed handle lookup. + let relinked_to_source = matches!( + (Handle::from_path(&exe), Handle::from_path(node_dir.join("node.exe"))), + (Ok(exe_handle), Ok(source_handle)) if exe_handle == source_handle, + ); + assert!( + !relinked_to_source, + "node.exe must stay the independent copy, not be relinked to the source", + ); +} + /// Windows-only: when the node manifest declares a non-`.exe` source /// (uncommon but possible — e.g. a wrapper script), pnpm falls through /// to the regular cmd-shim path. Pacquet must too.