diff --git a/.changeset/perf-importer-skip-staging.md b/.changeset/perf-importer-skip-staging.md new file mode 100644 index 0000000000..389be5fa01 --- /dev/null +++ b/.changeset/perf-importer-skip-staging.md @@ -0,0 +1,10 @@ +--- +"@pnpm/fs.indexed-pkg-importer": patch +"@pnpm/installing.package-requester": patch +"@pnpm/worker": patch +"pnpm": patch +--- + +Skip the staging directory when importing packages into `node_modules`. This avoids the overhead of creating a temp dir and renaming per package. Falls back to the atomic staging path on error. + +Packages that lack a `package.json` now get a synthetic empty one added to the store so that `package.json` can serve as a universal completion marker for the importer. diff --git a/fs/indexed-pkg-importer/src/importIndexedDir.ts b/fs/indexed-pkg-importer/src/importIndexedDir.ts index 189aedd708..757423892c 100644 --- a/fs/indexed-pkg-importer/src/importIndexedDir.ts +++ b/fs/indexed-pkg-importer/src/importIndexedDir.ts @@ -15,8 +15,17 @@ const filenameConflictsLogger = logger('_filename-conflicts') export type ImportFile = (src: string, dest: string) => void +export interface Importer { + importFile: ImportFile + // Used for writing package.json, which is the completion marker and must + // be written atomically. For hard links and reflinks importFile is already + // atomic so callers pass the same function. The copy path passes a + // temp-file + rename wrapper instead. + importFileAtomic: ImportFile +} + export function importIndexedDir ( - importFile: ImportFile, + importer: Importer, newDir: string, filenames: Map, opts: { @@ -24,9 +33,40 @@ export function importIndexedDir ( safeToSkip?: boolean } ): void { + // Fast path: import directly without staging. Callers already verified + // the target package is missing (pkgExistsAtTargetDir / pkgLinkedToStore), + // so we can write straight into newDir and skip the temp dir + rename. + // On any error, clean up and fall through to the staging path which has + // full error handling (EEXIST dedup, ENOENT sanitized-filename retry, etc.). + // keepModulesDir needs the staging path to preserve the existing node_modules. + if (!opts.keepModulesDir) try { + // For safeToSkip (content-addressed GVS), use non-destructive mkdirSync + // so concurrent importers don't wipe each other's files. + if (opts.safeToSkip) { + fs.mkdirSync(newDir, { recursive: true }) + } else { + makeEmptyDirSync(newDir, { recursive: true }) + } + tryImportIndexedDir(importer, newDir, filenames) + return + } catch (err) { + if (util.types.isNativeError(err) && 'code' in err && err.code === 'EEXIST') { + // A concurrent importer may have completed the directory. + // If all files match, there's nothing left to do. + if (allFilesMatch(newDir, filenames)) return + } else { + try { + rimrafSync(newDir) + } catch {} // eslint-disable-line:no-empty + } + } + // Staging path: create in temp dir, then atomically rename. + // The dir rename is itself atomic, so individual file atomicity is not + // needed here — use importFile for everything. const stage = pathTemp(newDir) try { - tryImportIndexedDir(importFile, stage, filenames) + makeEmptyDirSync(stage, { recursive: true }) + tryImportIndexedDir({ importFile: importer.importFile, importFileAtomic: importer.importFile }, stage, filenames) if (opts.keepModulesDir) { // Keeping node_modules is needed only when the hoisted node linker is used. moveOrMergeModulesDirs(path.join(newDir, 'node_modules'), path.join(stage, 'node_modules')) @@ -48,18 +88,12 @@ export function importIndexedDir ( 'which is an issue on case-insensitive filesystems. ' + `The conflicting file names are: ${JSON.stringify(Object.fromEntries(conflictingFileNames))}` ) - importIndexedDir(importFile, newDir, uniqueFileMap, opts) + importIndexedDir(importer, newDir, uniqueFileMap, opts) return } if (util.types.isNativeError(err) && 'code' in err && err.code === 'ENOENT') { - const { sanitizedFilenames, invalidFilenames } = sanitizeFilenames(filenames) - if (invalidFilenames.length === 0) throw err - globalWarn(`\ -The package linked to "${path.relative(process.cwd(), newDir)}" had \ -files with invalid names: ${invalidFilenames.join(', ')}. \ -They were renamed.`) - importIndexedDir(importFile, newDir, sanitizedFilenames, opts) - return + if (retryWithSanitizedFilenames(importer, newDir, filenames, opts)) return + throw err } throw err } @@ -118,6 +152,22 @@ function allFilesMatch (dir: string, filenames: Map): boolean { return true } +function retryWithSanitizedFilenames ( + importer: Importer, + newDir: string, + filenames: Map, + opts: { keepModulesDir?: boolean, safeToSkip?: boolean } +): boolean { + const { sanitizedFilenames, invalidFilenames } = sanitizeFilenames(filenames) + if (invalidFilenames.length === 0) return false + globalWarn(`\ +The package linked to "${path.relative(process.cwd(), newDir)}" had \ +files with invalid names: ${invalidFilenames.join(', ')}. \ +They were renamed.`) + importIndexedDir(importer, newDir, sanitizedFilenames, opts) + return true +} + interface SanitizeFilenamesResult { sanitizedFilenames: Map invalidFilenames: string[] @@ -136,8 +186,11 @@ function sanitizeFilenames (filenames: Map): SanitizeFilenamesRe return { sanitizedFilenames, invalidFilenames } } -function tryImportIndexedDir (importFile: ImportFile, newDir: string, filenames: Map): void { - makeEmptyDirSync(newDir, { recursive: true }) +function tryImportIndexedDir ( + { importFile, importFileAtomic }: Importer, + newDir: string, + filenames: Map +): void { const allDirs = new Set() for (const f of filenames.keys()) { const dir = path.dirname(f) @@ -147,9 +200,20 @@ function tryImportIndexedDir (importFile: ImportFile, newDir: string, filenames: Array.from(allDirs) .sort((d1, d2) => d1.length - d2.length) // from shortest to longest .forEach((dir) => fs.mkdirSync(path.join(newDir, dir), { recursive: true })) + // Write package.json last so it acts as a completion marker. + // pkgExistsAtTargetDir() checks for package.json to decide if a package + // is already imported — writing it last ensures a crash mid-import won't + // leave a partially-populated directory that appears fully imported. + let packageJsonSrc: string | undefined for (const [f, src] of filenames) { - const dest = path.join(newDir, f) - importFile(src, dest) + if (f === 'package.json') { + packageJsonSrc = src + continue + } + importFile(src, path.join(newDir, f)) + } + if (packageJsonSrc !== undefined) { + importFileAtomic(packageJsonSrc, path.join(newDir, 'package.json')) } } diff --git a/fs/indexed-pkg-importer/src/index.ts b/fs/indexed-pkg-importer/src/index.ts index f918820d59..c91d9d0800 100644 --- a/fs/indexed-pkg-importer/src/index.ts +++ b/fs/indexed-pkg-importer/src/index.ts @@ -7,6 +7,8 @@ import { packageImportMethodLogger } from '@pnpm/core-loggers' import fs from '@pnpm/fs.graceful-fs' import { globalInfo, globalWarn } from '@pnpm/logger' import type { FilesMap, ImportIndexedPackage, ImportOptions } from '@pnpm/store.controller-types' +import { fastPathTemp as pathTemp } from 'path-temp' +import { renameOverwriteSync } from 'rename-overwrite' import { type ImportFile, importIndexedDir } from './importIndexedDir.js' @@ -122,7 +124,7 @@ function clonePkg ( opts: ImportOptions ): 'clone' | undefined { if (opts.resolvedFrom !== 'store' || opts.force || !pkgExistsAtTargetDir(to, opts.filesMap)) { - importIndexedDir(clone, to, opts.filesMap, opts) + importIndexedDir({ importFile: clone, importFileAtomic: clone }, to, opts.filesMap, opts) return 'clone' } return undefined @@ -133,9 +135,9 @@ function pkgExistsAtTargetDir (targetDir: string, filesMap: FilesMap): boolean { } function pickFileFromFilesMap (filesMap: FilesMap): string { - // A package might not have a package.json file. - // For instance, the Node.js package. - // Or injected packages in a Bit workspace. + // New packages always have a package.json (the worker synthesizes one if + // the tarball/directory lacks it). The fallback handles old store entries + // that were indexed before the synthetic package.json was introduced. if (filesMap.has('package.json')) { return 'package.json' } @@ -177,7 +179,7 @@ function hardlinkPkg ( opts: ImportOptions ): 'hardlink' | undefined { if (opts.force || shouldRelinkPkg(to, opts)) { - importIndexedDir(importFile, to, opts.filesMap, opts) + importIndexedDir({ importFile, importFileAtomic: importFile }, to, opts.filesMap, opts) return 'hardlink' } return undefined @@ -232,8 +234,24 @@ export function copyPkg ( opts: ImportOptions ): 'copy' | undefined { if (opts.resolvedFrom !== 'store' || opts.force || !pkgExistsAtTargetDir(to, opts.filesMap)) { - importIndexedDir(fs.copyFileSync, to, opts.filesMap, opts) + // copyFileSync is not atomic on non-COW filesystems: a crash mid-copy + // can leave a partially-written file. package.json is the completion + // marker, so it must be written atomically via temp file + rename. + importIndexedDir({ importFile: fs.copyFileSync, importFileAtomic: atomicCopyFileSync }, to, opts.filesMap, opts) return 'copy' } return undefined } + +function atomicCopyFileSync (src: string, dest: string): void { + const tmp = pathTemp(dest) + try { + fs.copyFileSync(src, tmp) + } catch (err) { + try { + fs.unlinkSync(tmp) + } catch {} // eslint-disable-line:no-empty + throw err + } + renameOverwriteSync(tmp, dest) +} diff --git a/fs/indexed-pkg-importer/test/createImportPackage.test.ts b/fs/indexed-pkg-importer/test/createImportPackage.test.ts index 553b88ea1f..472fdbd58c 100644 --- a/fs/indexed-pkg-importer/test/createImportPackage.test.ts +++ b/fs/indexed-pkg-importer/test/createImportPackage.test.ts @@ -41,6 +41,11 @@ const { createIndexedPkgImporter } = await import('@pnpm/fs.indexed-pkg-importer const { globalInfo } = await import('@pnpm/logger') beforeEach(() => { + // Clean up real directories created by the importer (not mocked) so each + // test starts fresh — otherwise the fast path sees leftover dirs and writes + // over them, causing different behavior than a fresh import. + fs.rmSync('project', { recursive: true, force: true }) + fs.rmSync('project2', { recursive: true, force: true }) jest.mocked(gfs.copyFileSync).mockClear() jest.mocked(gfs.linkSync).mockClear() jest.mocked(gfs.mkdirSync).mockClear() @@ -48,6 +53,11 @@ beforeEach(() => { jest.mocked(globalInfo).mockReset() }) +afterAll(() => { + fs.rmSync('project', { recursive: true, force: true }) + fs.rmSync('project2', { recursive: true, force: true }) +}) + testOnLinuxOnly('packageImportMethod=auto: clone files by default', () => { const importPackage = createIndexedPkgImporter('auto') expect(importPackage('project/package', { @@ -60,12 +70,12 @@ testOnLinuxOnly('packageImportMethod=auto: clone files by default', () => { })).toBe('clone') expect(gfs.copyFileSync).toHaveBeenCalledWith( path.join('hash1'), - path.join('project', 'package_tmp', 'package.json'), + path.join('project', 'package', 'package.json'), fs.constants.COPYFILE_FICLONE_FORCE ) expect(gfs.copyFileSync).toHaveBeenCalledWith( path.join('hash2'), - path.join('project', 'package_tmp', 'index.js'), + path.join('project', 'package', 'index.js'), fs.constants.COPYFILE_FICLONE_FORCE ) }) @@ -83,8 +93,8 @@ testOnLinuxOnly('packageImportMethod=auto: link files if cloning fails', () => { force: false, resolvedFrom: 'remote', })).toBe('hardlink') - expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash1'), path.join('project', 'package_tmp', 'package.json')) - expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project', 'package_tmp', 'index.js')) + expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash1'), path.join('project', 'package', 'package.json')) + expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project', 'package', 'index.js')) expect(gfs.copyFileSync).toHaveBeenCalled() jest.mocked(gfs.copyFileSync).mockClear() @@ -98,8 +108,8 @@ testOnLinuxOnly('packageImportMethod=auto: link files if cloning fails', () => { resolvedFrom: 'remote', })).toBe('hardlink') expect(gfs.copyFileSync).not.toHaveBeenCalled() - expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash1'), path.join('project2', 'package_tmp', 'package.json')) - expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project2', 'package_tmp', 'index.js')) + expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash1'), path.join('project2', 'package', 'package.json')) + expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project2', 'package', 'index.js')) }) testOnLinuxOnly('packageImportMethod=auto: link files if cloning fails and even hard linking fails but not with EXDEV error', () => { @@ -121,9 +131,11 @@ testOnLinuxOnly('packageImportMethod=auto: link files if cloning fails and even force: false, resolvedFrom: 'remote', })).toBe('hardlink') - expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project', 'package_tmp', 'index.js')) + expect(gfs.linkSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project', 'package', 'index.js')) expect(gfs.linkSync).toHaveBeenCalledTimes(2) - expect(gfs.copyFileSync).toHaveBeenCalledTimes(1) + // copyFileSync is called twice: the clone attempt fails in both the fast + // path and the staging fallback before initialAuto moves on to hardlink. + expect(gfs.copyFileSync).toHaveBeenCalledTimes(2) }) testOnLinuxOnly('packageImportMethod=auto: chooses copying if cloning and hard linking is not possible', () => { @@ -143,8 +155,9 @@ testOnLinuxOnly('packageImportMethod=auto: chooses copying if cloning and hard l force: false, resolvedFrom: 'remote', })).toBe('copy') - expect(gfs.copyFileSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project', 'package_tmp', 'index.js')) - expect(gfs.copyFileSync).toHaveBeenCalledTimes(2) + expect(gfs.copyFileSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project', 'package', 'index.js')) + // 3 calls: clone fails twice (fast path + staging fallback), then copy succeeds. + expect(gfs.copyFileSync).toHaveBeenCalledTimes(3) }) testOnLinuxOnly('packageImportMethod=hardlink: fall back to copying if hardlinking fails', () => { @@ -166,8 +179,8 @@ testOnLinuxOnly('packageImportMethod=hardlink: fall back to copying if hardlinki })).toBe('hardlink') expect(gfs.linkSync).toHaveBeenCalledTimes(3) expect(gfs.copyFileSync).toHaveBeenCalledTimes(2) // One time the target already exists, so it won't be copied - expect(gfs.copyFileSync).toHaveBeenCalledWith(path.join('hash1'), path.join('project', 'package_tmp', 'package.json')) - expect(gfs.copyFileSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project', 'package_tmp', 'index.js')) + expect(gfs.copyFileSync).toHaveBeenCalledWith(path.join('hash1'), path.join('project', 'package', 'package.json')) + expect(gfs.copyFileSync).toHaveBeenCalledWith(path.join('hash2'), path.join('project', 'package', 'index.js')) }) test('packageImportMethod=hardlink does not relink package from store if package.json is linked from the store', () => { diff --git a/fs/indexed-pkg-importer/test/importIndexedDir.race.test.ts b/fs/indexed-pkg-importer/test/importIndexedDir.race.test.ts index 7f527cd8f3..68c609e126 100644 --- a/fs/indexed-pkg-importer/test/importIndexedDir.race.test.ts +++ b/fs/indexed-pkg-importer/test/importIndexedDir.race.test.ts @@ -34,7 +34,7 @@ test('safeToSkip skips when target already exists (content-addressed)', () => { const filenames = new Map([['package.json', srcFile]]) // Should not throw — target exists, path is content-addressed so content is correct - importIndexedDir(fs.copyFileSync, newDir, filenames, { safeToSkip: true }) + importIndexedDir({ importFile: fs.copyFileSync, importFileAtomic: fs.copyFileSync }, newDir, filenames, { safeToSkip: true }) expect(fs.existsSync(path.join(newDir, 'package.json'))).toBe(true) // When safeToSkip is true and the target already exists with matching content, @@ -54,7 +54,7 @@ test('safeToSkip creates dir when target does not exist', () => { const filenames = new Map([['index.js', srcFile]]) // Target doesn't exist — should create it - importIndexedDir(fs.copyFileSync, newDir, filenames, { safeToSkip: true }) + importIndexedDir({ importFile: fs.copyFileSync, importFileAtomic: fs.copyFileSync }, newDir, filenames, { safeToSkip: true }) expect(fs.existsSync(path.join(newDir, 'index.js'))).toBe(true) }) diff --git a/fs/indexed-pkg-importer/test/importIndexedDir.test.ts b/fs/indexed-pkg-importer/test/importIndexedDir.test.ts index a644c309cc..4ab7dd99db 100644 --- a/fs/indexed-pkg-importer/test/importIndexedDir.test.ts +++ b/fs/indexed-pkg-importer/test/importIndexedDir.test.ts @@ -17,7 +17,7 @@ test('importIndexedDir() keepModulesDir merges node_modules', async () => { const filenames = new Map([ ['node_modules/a/index.js', path.join(tmp, 'src/node_modules/a/index.js')], ]) - importIndexedDir(fs.linkSync, newDir, filenames, { keepModulesDir: true }) + importIndexedDir({ importFile: fs.linkSync, importFileAtomic: fs.linkSync }, newDir, filenames, { keepModulesDir: true }) expect(fs.readdirSync(path.join(newDir, 'node_modules')).sort()).toEqual(['a', 'b']) }) diff --git a/installing/package-requester/src/packageRequester.ts b/installing/package-requester/src/packageRequester.ts index 6cbd3bd48b..ff0f569237 100644 --- a/installing/package-requester/src/packageRequester.ts +++ b/installing/package-requester/src/packageRequester.ts @@ -281,7 +281,12 @@ async function resolveAndFetch ( if (fetchedResult.bundledManifest) { manifest = fetchedResult.bundledManifest as DependencyManifest } else if (fetchedResult.files.filesMap.has('package.json')) { - manifest = await loadJsonFile(fetchedResult.files.filesMap.get('package.json')!) + const loadedManifest = await loadJsonFile>(fetchedResult.files.filesMap.get('package.json')!) + // Skip placeholder package.json added as a completion marker by the worker + // for packages that genuinely lack one. + if (!loadedManifest._pnpmPlaceholder) { + manifest = loadedManifest as unknown as DependencyManifest + } } // Add integrity to resolution if it was computed during fetching (only for TarballResolution) if (fetchedResult.integrity && !resolution.type && !(resolution as TarballResolution).integrity) { diff --git a/worker/src/start.ts b/worker/src/start.ts index 6db9c70e27..cd93a972d9 100644 --- a/worker/src/start.ts +++ b/worker/src/start.ts @@ -203,6 +203,8 @@ function addTarballToStore ({ buffer, storeDir, integrity, filesIndexFile, appen if (appendManifest && manifest == null) { manifest = appendManifest addManifestToCafs(cafs, filesIndex, appendManifest) + } else if (!filesIndex.has('package.json')) { + addPlaceholderPackageJsonToCafs(cafs, filesIndex) } const { filesIntegrity, filesMap } = processFilesIndex(filesIndex) const bundledManifest = manifest != null ? normalizeBundledManifest(manifest) : undefined @@ -302,6 +304,8 @@ function addFilesFromDir ( if (appendManifest && manifest == null) { manifest = appendManifest addManifestToCafs(cafs, filesIndex, appendManifest) + } else if (!filesIndex.has('package.json')) { + addPlaceholderPackageJsonToCafs(cafs, filesIndex) } const { filesIntegrity, filesMap } = processFilesIndex(filesIndex) const bundledManifest = manifest != null ? normalizeBundledManifest(manifest) : undefined @@ -360,6 +364,22 @@ function addManifestToCafs (cafs: CafsFunctions, filesIndex: FilesIndex, manifes }) } +const PLACEHOLDER_PACKAGE_JSON = Buffer.from(JSON.stringify({ _pnpmPlaceholder: 'This file was generated by pnpm. The original package did not contain a package.json.' }), 'utf8') + +// Packages that lack a package.json (e.g. injected packages in a Bit +// workspace) get a synthetic one so that package.json can serve as a +// universal completion marker for the indexed package importer. +// The _pnpmPlaceholder field tells the package requester to ignore it +// when reading the manifest. +function addPlaceholderPackageJsonToCafs (cafs: CafsFunctions, filesIndex: FilesIndex): void { + const mode = 0o644 + filesIndex.set('package.json', { + mode, + size: PLACEHOLDER_PACKAGE_JSON.length, + ...cafs.addFile(PLACEHOLDER_PACKAGE_JSON, mode), + }) +} + function calculateDiff (baseFiles: PackageFiles, sideEffectsFiles: PackageFiles): SideEffectsDiff { const deleted: string[] = [] const added: PackageFiles = new Map()