perf(importer): skip staging directory and write package.json as completion marker (#11088)

## Problem

The indexed package importer always creates a staging temp directory, imports files there, then renames to the final location. For cold installs where the target doesn't exist (the common case), the staging + rename is unnecessary overhead.

## Solution

- **Fast path**: callers already verify the target package is missing before calling `importIndexedDir`, so we can write directly into the final directory and skip the temp dir + rename. Falls back to the atomic staging path on EEXIST (concurrent import race) or when `keepModulesDir` is set (hoisted linker needs to merge existing `node_modules`).

- **Completion marker**: `package.json` is written last by `tryImportIndexedDir`, so `pkgExistsAtTargetDir()` (which checks for `package.json`) won't consider a partially-imported directory as complete after a crash.

- **Atomic copy**: the copy import path (non-COW filesystems) uses a temp file + `renameOverwriteSync` for the `package.json` write, since `copyFileSync` is not atomic. Hard links and reflinks are inherently atomic. This is expressed via the `Importer` interface (`importFile` + `importFileAtomic`), passed as the first argument to `importIndexedDir`.

- **Synthetic package.json**: packages that lack a `package.json` (e.g. injected Bit workspace packages) now get a synthetic empty `{}` added to the store, so the completion marker works universally.

- **DRY**: extracted `retryWithSanitizedFilenames()` to deduplicate the ENOENT handler used by both the fast path and staging path.
This commit is contained in:
Victor Sumner
2026-03-25 18:16:08 -04:00
committed by GitHub
parent 5a3dc4ab2f
commit ee9fe5853e
8 changed files with 167 additions and 37 deletions

View File

@@ -0,0 +1,10 @@
---
"@pnpm/fs.indexed-pkg-importer": patch
"@pnpm/installing.package-requester": patch
"@pnpm/worker": patch
"pnpm": patch
---
Skip the staging directory when importing packages into `node_modules`. This avoids the overhead of creating a temp dir and renaming per package. Falls back to the atomic staging path on error.
Packages that lack a `package.json` now get a synthetic empty one added to the store so that `package.json` can serve as a universal completion marker for the importer.

View File

@@ -15,8 +15,17 @@ const filenameConflictsLogger = logger('_filename-conflicts')
export type ImportFile = (src: string, dest: string) => void
export interface Importer {
importFile: ImportFile
// Used for writing package.json, which is the completion marker and must
// be written atomically. For hard links and reflinks importFile is already
// atomic so callers pass the same function. The copy path passes a
// temp-file + rename wrapper instead.
importFileAtomic: ImportFile
}
export function importIndexedDir (
importFile: ImportFile,
importer: Importer,
newDir: string,
filenames: Map<string, string>,
opts: {
@@ -24,9 +33,40 @@ export function importIndexedDir (
safeToSkip?: boolean
}
): void {
// Fast path: import directly without staging. Callers already verified
// the target package is missing (pkgExistsAtTargetDir / pkgLinkedToStore),
// so we can write straight into newDir and skip the temp dir + rename.
// On any error, clean up and fall through to the staging path which has
// full error handling (EEXIST dedup, ENOENT sanitized-filename retry, etc.).
// keepModulesDir needs the staging path to preserve the existing node_modules.
if (!opts.keepModulesDir) try {
// For safeToSkip (content-addressed GVS), use non-destructive mkdirSync
// so concurrent importers don't wipe each other's files.
if (opts.safeToSkip) {
fs.mkdirSync(newDir, { recursive: true })
} else {
makeEmptyDirSync(newDir, { recursive: true })
}
tryImportIndexedDir(importer, newDir, filenames)
return
} catch (err) {
if (util.types.isNativeError(err) && 'code' in err && err.code === 'EEXIST') {
// A concurrent importer may have completed the directory.
// If all files match, there's nothing left to do.
if (allFilesMatch(newDir, filenames)) return
} else {
try {
rimrafSync(newDir)
} catch {} // eslint-disable-line:no-empty
}
}
// Staging path: create in temp dir, then atomically rename.
// The dir rename is itself atomic, so individual file atomicity is not
// needed here — use importFile for everything.
const stage = pathTemp(newDir)
try {
tryImportIndexedDir(importFile, stage, filenames)
makeEmptyDirSync(stage, { recursive: true })
tryImportIndexedDir({ importFile: importer.importFile, importFileAtomic: importer.importFile }, stage, filenames)
if (opts.keepModulesDir) {
// Keeping node_modules is needed only when the hoisted node linker is used.
moveOrMergeModulesDirs(path.join(newDir, 'node_modules'), path.join(stage, 'node_modules'))
@@ -48,18 +88,12 @@ export function importIndexedDir (
'which is an issue on case-insensitive filesystems. ' +
`The conflicting file names are: ${JSON.stringify(Object.fromEntries(conflictingFileNames))}`
)
importIndexedDir(importFile, newDir, uniqueFileMap, opts)
importIndexedDir(importer, newDir, uniqueFileMap, opts)
return
}
if (util.types.isNativeError(err) && 'code' in err && err.code === 'ENOENT') {
const { sanitizedFilenames, invalidFilenames } = sanitizeFilenames(filenames)
if (invalidFilenames.length === 0) throw err
globalWarn(`\
The package linked to "${path.relative(process.cwd(), newDir)}" had \
files with invalid names: ${invalidFilenames.join(', ')}. \
They were renamed.`)
importIndexedDir(importFile, newDir, sanitizedFilenames, opts)
return
if (retryWithSanitizedFilenames(importer, newDir, filenames, opts)) return
throw err
}
throw err
}
@@ -118,6 +152,22 @@ function allFilesMatch (dir: string, filenames: Map<string, string>): boolean {
return true
}
function retryWithSanitizedFilenames (
importer: Importer,
newDir: string,
filenames: Map<string, string>,
opts: { keepModulesDir?: boolean, safeToSkip?: boolean }
): boolean {
const { sanitizedFilenames, invalidFilenames } = sanitizeFilenames(filenames)
if (invalidFilenames.length === 0) return false
globalWarn(`\
The package linked to "${path.relative(process.cwd(), newDir)}" had \
files with invalid names: ${invalidFilenames.join(', ')}. \
They were renamed.`)
importIndexedDir(importer, newDir, sanitizedFilenames, opts)
return true
}
interface SanitizeFilenamesResult {
sanitizedFilenames: Map<string, string>
invalidFilenames: string[]
@@ -136,8 +186,11 @@ function sanitizeFilenames (filenames: Map<string, string>): SanitizeFilenamesRe
return { sanitizedFilenames, invalidFilenames }
}
function tryImportIndexedDir (importFile: ImportFile, newDir: string, filenames: Map<string, string>): void {
makeEmptyDirSync(newDir, { recursive: true })
function tryImportIndexedDir (
{ importFile, importFileAtomic }: Importer,
newDir: string,
filenames: Map<string, string>
): void {
const allDirs = new Set<string>()
for (const f of filenames.keys()) {
const dir = path.dirname(f)
@@ -147,9 +200,20 @@ function tryImportIndexedDir (importFile: ImportFile, newDir: string, filenames:
Array.from(allDirs)
.sort((d1, d2) => d1.length - d2.length) // from shortest to longest
.forEach((dir) => fs.mkdirSync(path.join(newDir, dir), { recursive: true }))
// Write package.json last so it acts as a completion marker.
// pkgExistsAtTargetDir() checks for package.json to decide if a package
// is already imported — writing it last ensures a crash mid-import won't
// leave a partially-populated directory that appears fully imported.
let packageJsonSrc: string | undefined
for (const [f, src] of filenames) {
const dest = path.join(newDir, f)
importFile(src, dest)
if (f === 'package.json') {
packageJsonSrc = src
continue
}
importFile(src, path.join(newDir, f))
}
if (packageJsonSrc !== undefined) {
importFileAtomic(packageJsonSrc, path.join(newDir, 'package.json'))
}
}

View File

@@ -7,6 +7,8 @@ import { packageImportMethodLogger } from '@pnpm/core-loggers'
import fs from '@pnpm/fs.graceful-fs'
import { globalInfo, globalWarn } from '@pnpm/logger'
import type { FilesMap, ImportIndexedPackage, ImportOptions } from '@pnpm/store.controller-types'
import { fastPathTemp as pathTemp } from 'path-temp'
import { renameOverwriteSync } from 'rename-overwrite'
import { type ImportFile, importIndexedDir } from './importIndexedDir.js'
@@ -122,7 +124,7 @@ function clonePkg (
opts: ImportOptions
): 'clone' | undefined {
if (opts.resolvedFrom !== 'store' || opts.force || !pkgExistsAtTargetDir(to, opts.filesMap)) {
importIndexedDir(clone, to, opts.filesMap, opts)
importIndexedDir({ importFile: clone, importFileAtomic: clone }, to, opts.filesMap, opts)
return 'clone'
}
return undefined
@@ -133,9 +135,9 @@ function pkgExistsAtTargetDir (targetDir: string, filesMap: FilesMap): boolean {
}
function pickFileFromFilesMap (filesMap: FilesMap): string {
// A package might not have a package.json file.
// For instance, the Node.js package.
// Or injected packages in a Bit workspace.
// New packages always have a package.json (the worker synthesizes one if
// the tarball/directory lacks it). The fallback handles old store entries
// that were indexed before the synthetic package.json was introduced.
if (filesMap.has('package.json')) {
return 'package.json'
}
@@ -177,7 +179,7 @@ function hardlinkPkg (
opts: ImportOptions
): 'hardlink' | undefined {
if (opts.force || shouldRelinkPkg(to, opts)) {
importIndexedDir(importFile, to, opts.filesMap, opts)
importIndexedDir({ importFile, importFileAtomic: importFile }, to, opts.filesMap, opts)
return 'hardlink'
}
return undefined
@@ -232,8 +234,24 @@ export function copyPkg (
opts: ImportOptions
): 'copy' | undefined {
if (opts.resolvedFrom !== 'store' || opts.force || !pkgExistsAtTargetDir(to, opts.filesMap)) {
importIndexedDir(fs.copyFileSync, to, opts.filesMap, opts)
// copyFileSync is not atomic on non-COW filesystems: a crash mid-copy
// can leave a partially-written file. package.json is the completion
// marker, so it must be written atomically via temp file + rename.
importIndexedDir({ importFile: fs.copyFileSync, importFileAtomic: atomicCopyFileSync }, to, opts.filesMap, opts)
return 'copy'
}
return undefined
}
function atomicCopyFileSync (src: string, dest: string): void {
const tmp = pathTemp(dest)
try {
fs.copyFileSync(src, tmp)
} catch (err) {
try {
fs.unlinkSync(tmp)
} catch {} // eslint-disable-line:no-empty
throw err
}
renameOverwriteSync(tmp, dest)
}

View File

@@ -41,6 +41,11 @@ const { createIndexedPkgImporter } = await import('@pnpm/fs.indexed-pkg-importer
const { globalInfo } = await import('@pnpm/logger')
beforeEach(() => {
// Clean up real directories created by the importer (not mocked) so each
// test starts fresh — otherwise the fast path sees leftover dirs and writes
// over them, causing different behavior than a fresh import.
fs.rmSync('project', { recursive: true, force: true })
fs.rmSync('project2', { recursive: true, force: true })
jest.mocked(gfs.copyFileSync).mockClear()
jest.mocked(gfs.linkSync).mockClear()
jest.mocked(gfs.mkdirSync).mockClear()
@@ -48,6 +53,11 @@ beforeEach(() => {
jest.mocked(globalInfo).mockReset()
})
afterAll(() => {
fs.rmSync('project', { recursive: true, force: true })
fs.rmSync('project2', { recursive: true, force: true })
})
testOnLinuxOnly('packageImportMethod=auto: clone files by default', () => {
const importPackage = createIndexedPkgImporter('auto')
expect(importPackage('project/package', {
@@ -60,12 +70,12 @@ testOnLinuxOnly('packageImportMethod=auto: clone files by default', () => {
})).toBe('clone')
expect(gfs.copyFileSync).toHaveBeenCalledWith(
path.join('hash1'),
path.join('project', 'package_tmp', 'package.json'),
path.join('project', 'package', 'package.json'),
fs.constants.COPYFILE_FICLONE_FORCE
)
expect(gfs.copyFileSync).toHaveBeenCalledWith(
path.join('hash2'),
path.join('project', 'package_tmp', 'index.js'),
path.join('project', 'package', 'index.js'),
fs.constants.COPYFILE_FICLONE_FORCE
)
})
@@ -83,8 +93,8 @@ testOnLinuxOnly('packageImportMethod=auto: link files if cloning fails', () => {
force: false,
resolvedFrom: 'remote',
})).toBe('hardlink')
expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash1'), path.join('project', 'package_tmp', 'package.json'))
expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project', 'package_tmp', 'index.js'))
expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash1'), path.join('project', 'package', 'package.json'))
expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project', 'package', 'index.js'))
expect(gfs.copyFileSync).toHaveBeenCalled()
jest.mocked(gfs.copyFileSync).mockClear()
@@ -98,8 +108,8 @@ testOnLinuxOnly('packageImportMethod=auto: link files if cloning fails', () => {
resolvedFrom: 'remote',
})).toBe('hardlink')
expect(gfs.copyFileSync).not.toHaveBeenCalled()
expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash1'), path.join('project2', 'package_tmp', 'package.json'))
expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project2', 'package_tmp', 'index.js'))
expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash1'), path.join('project2', 'package', 'package.json'))
expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project2', 'package', 'index.js'))
})
testOnLinuxOnly('packageImportMethod=auto: link files if cloning fails and even hard linking fails but not with EXDEV error', () => {
@@ -121,9 +131,11 @@ testOnLinuxOnly('packageImportMethod=auto: link files if cloning fails and even
force: false,
resolvedFrom: 'remote',
})).toBe('hardlink')
expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project', 'package_tmp', 'index.js'))
expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project', 'package', 'index.js'))
expect(gfs.linkSync).toHaveBeenCalledTimes(2)
expect(gfs.copyFileSync).toHaveBeenCalledTimes(1)
// copyFileSync is called twice: the clone attempt fails in both the fast
// path and the staging fallback before initialAuto moves on to hardlink.
expect(gfs.copyFileSync).toHaveBeenCalledTimes(2)
})
testOnLinuxOnly('packageImportMethod=auto: chooses copying if cloning and hard linking is not possible', () => {
@@ -143,8 +155,9 @@ testOnLinuxOnly('packageImportMethod=auto: chooses copying if cloning and hard l
force: false,
resolvedFrom: 'remote',
})).toBe('copy')
expect(gfs.copyFileSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project', 'package_tmp', 'index.js'))
expect(gfs.copyFileSync).toHaveBeenCalledTimes(2)
expect(gfs.copyFileSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project', 'package', 'index.js'))
// 3 calls: clone fails twice (fast path + staging fallback), then copy succeeds.
expect(gfs.copyFileSync).toHaveBeenCalledTimes(3)
})
testOnLinuxOnly('packageImportMethod=hardlink: fall back to copying if hardlinking fails', () => {
@@ -166,8 +179,8 @@ testOnLinuxOnly('packageImportMethod=hardlink: fall back to copying if hardlinki
})).toBe('hardlink')
expect(gfs.linkSync).toHaveBeenCalledTimes(3)
expect(gfs.copyFileSync).toHaveBeenCalledTimes(2) // One time the target already exists, so it won't be copied
expect(gfs.copyFileSync).toHaveBeenCalledWith(path.join('hash1'), path.join('project', 'package_tmp', 'package.json'))
expect(gfs.copyFileSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project', 'package_tmp', 'index.js'))
expect(gfs.copyFileSync).toHaveBeenCalledWith(path.join('hash1'), path.join('project', 'package', 'package.json'))
expect(gfs.copyFileSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project', 'package', 'index.js'))
})
test('packageImportMethod=hardlink does not relink package from store if package.json is linked from the store', () => {

View File

@@ -34,7 +34,7 @@ test('safeToSkip skips when target already exists (content-addressed)', () => {
const filenames = new Map([['package.json', srcFile]])
// Should not throw — target exists, path is content-addressed so content is correct
importIndexedDir(fs.copyFileSync, newDir, filenames, { safeToSkip: true })
importIndexedDir({ importFile: fs.copyFileSync, importFileAtomic: fs.copyFileSync }, newDir, filenames, { safeToSkip: true })
expect(fs.existsSync(path.join(newDir, 'package.json'))).toBe(true)
// When safeToSkip is true and the target already exists with matching content,
@@ -54,7 +54,7 @@ test('safeToSkip creates dir when target does not exist', () => {
const filenames = new Map([['index.js', srcFile]])
// Target doesn't exist — should create it
importIndexedDir(fs.copyFileSync, newDir, filenames, { safeToSkip: true })
importIndexedDir({ importFile: fs.copyFileSync, importFileAtomic: fs.copyFileSync }, newDir, filenames, { safeToSkip: true })
expect(fs.existsSync(path.join(newDir, 'index.js'))).toBe(true)
})

View File

@@ -17,7 +17,7 @@ test('importIndexedDir() keepModulesDir merges node_modules', async () => {
const filenames = new Map([
['node_modules/a/index.js', path.join(tmp, 'src/node_modules/a/index.js')],
])
importIndexedDir(fs.linkSync, newDir, filenames, { keepModulesDir: true })
importIndexedDir({ importFile: fs.linkSync, importFileAtomic: fs.linkSync }, newDir, filenames, { keepModulesDir: true })
expect(fs.readdirSync(path.join(newDir, 'node_modules')).sort()).toEqual(['a', 'b'])
})

View File

@@ -281,7 +281,12 @@ async function resolveAndFetch (
if (fetchedResult.bundledManifest) {
manifest = fetchedResult.bundledManifest as DependencyManifest
} else if (fetchedResult.files.filesMap.has('package.json')) {
manifest = await loadJsonFile<DependencyManifest>(fetchedResult.files.filesMap.get('package.json')!)
const loadedManifest = await loadJsonFile<Record<string, unknown>>(fetchedResult.files.filesMap.get('package.json')!)
// Skip placeholder package.json added as a completion marker by the worker
// for packages that genuinely lack one.
if (!loadedManifest._pnpmPlaceholder) {
manifest = loadedManifest as unknown as DependencyManifest
}
}
// Add integrity to resolution if it was computed during fetching (only for TarballResolution)
if (fetchedResult.integrity && !resolution.type && !(resolution as TarballResolution).integrity) {

View File

@@ -203,6 +203,8 @@ function addTarballToStore ({ buffer, storeDir, integrity, filesIndexFile, appen
if (appendManifest && manifest == null) {
manifest = appendManifest
addManifestToCafs(cafs, filesIndex, appendManifest)
} else if (!filesIndex.has('package.json')) {
addPlaceholderPackageJsonToCafs(cafs, filesIndex)
}
const { filesIntegrity, filesMap } = processFilesIndex(filesIndex)
const bundledManifest = manifest != null ? normalizeBundledManifest(manifest) : undefined
@@ -302,6 +304,8 @@ function addFilesFromDir (
if (appendManifest && manifest == null) {
manifest = appendManifest
addManifestToCafs(cafs, filesIndex, appendManifest)
} else if (!filesIndex.has('package.json')) {
addPlaceholderPackageJsonToCafs(cafs, filesIndex)
}
const { filesIntegrity, filesMap } = processFilesIndex(filesIndex)
const bundledManifest = manifest != null ? normalizeBundledManifest(manifest) : undefined
@@ -360,6 +364,22 @@ function addManifestToCafs (cafs: CafsFunctions, filesIndex: FilesIndex, manifes
})
}
const PLACEHOLDER_PACKAGE_JSON = Buffer.from(JSON.stringify({ _pnpmPlaceholder: 'This file was generated by pnpm. The original package did not contain a package.json.' }), 'utf8')
// Packages that lack a package.json (e.g. injected packages in a Bit
// workspace) get a synthetic one so that package.json can serve as a
// universal completion marker for the indexed package importer.
// The _pnpmPlaceholder field tells the package requester to ignore it
// when reading the manifest.
function addPlaceholderPackageJsonToCafs (cafs: CafsFunctions, filesIndex: FilesIndex): void {
const mode = 0o644
filesIndex.set('package.json', {
mode,
size: PLACEHOLDER_PACKAGE_JSON.length,
...cafs.addFile(PLACEHOLDER_PACKAGE_JSON, mode),
})
}
function calculateDiff (baseFiles: PackageFiles, sideEffectsFiles: PackageFiles): SideEffectsDiff {
const deleted: string[] = []
const added: PackageFiles = new Map()