diff --git a/.changeset/fix-concurrent-import-partial-pkg.md b/.changeset/fix-concurrent-import-partial-pkg.md new file mode 100644 index 0000000000..4f24e0eef9 --- /dev/null +++ b/.changeset/fix-concurrent-import-partial-pkg.md @@ -0,0 +1,6 @@ +--- +"@pnpm/fs.indexed-pkg-importer": patch +"pnpm": patch +--- + +Fixed packages being materialized into the virtual store without their root-level files (`package.json`, `LICENSE`, README, root entrypoints) when multiple `pnpm install` processes ran against the same store/workspace concurrently. The fast import path used to destructively empty the shared target directory, so a concurrent importer could wipe files another importer had already written; if the surviving files included the `package.json` completion marker, every later install treated the broken directory as complete and never repaired it. The fast path now imports directly only when it can create the target directory exclusively, and otherwise builds the package in a private temp directory and atomically renames it into place [#12197](https://github.com/pnpm/pnpm/issues/12197). diff --git a/fs/indexed-pkg-importer/src/importIndexedDir.ts b/fs/indexed-pkg-importer/src/importIndexedDir.ts index 9cc32df55d..152cb90b4b 100644 --- a/fs/indexed-pkg-importer/src/importIndexedDir.ts +++ b/fs/indexed-pkg-importer/src/importIndexedDir.ts @@ -36,24 +36,27 @@ export function importIndexedDir ( // Fast path: import directly without staging. Callers already verified // the target package is missing (pkgExistsAtTargetDir / pkgLinkedToStore), // so we can write straight into newDir and skip the temp dir + rename. - // On any error, clean up and fall through to the staging path which has - // full error handling (EEXIST dedup, ENOENT sanitized-filename retry, etc.). + // On failure we fall through to the staging path, which has full error + // handling (EEXIST dedup, ENOENT sanitized-filename retry, etc.) and + // atomically swaps in a complete directory. // keepModulesDir needs the staging path to preserve the existing node_modules. - if (!opts.keepModulesDir) try { - // For safeToSkip (content-addressed GVS), use non-destructive mkdirSync - // so concurrent importers don't wipe each other's files. + if (!opts.keepModulesDir) { if (opts.safeToSkip) { - fs.mkdirSync(newDir, { recursive: true }) - } else { - makeEmptyDirSync(newDir, { recursive: true }) - } - tryImportIndexedDir(importer, newDir, filenames) - return - } catch (err) { - if (util.types.isNativeError(err) && 'code' in err && err.code === 'EEXIST') { - // A concurrent importer may have completed the directory. - // If all files match, there's nothing left to do. - if (allFilesMatch(newDir, filenames)) return + // Content-addressed target (e.g. global virtual store): the path is + // shared across projects, so concurrent importers are expected. Use a + // non-destructive mkdir and let importFile dedup via EEXIST. If another + // importer already completed the directory, there's nothing to do. + try { + fs.mkdirSync(newDir, { recursive: true }) + tryImportIndexedDir(importer, newDir, filenames) + return + } catch (err) { + if (util.types.isNativeError(err) && 'code' in err && err.code === 'EEXIST' && allFilesMatch(newDir, filenames)) { + return + } + } + } else if (tryExclusiveImport(importer, newDir, filenames)) { + return } } // Staging path: create in temp dir, then atomically rename. @@ -123,6 +126,46 @@ export function importIndexedDir ( } } +// Fast path for the regular virtual store: write directly into newDir, but +// only when we can create it exclusively. A successful exclusive mkdir proves +// no other process is importing the same package concurrently, so the direct +// (non-atomic) write is safe. If newDir already exists — a concurrent importer +// or a stale partial directory from an interrupted import — this returns false +// so the caller falls back to the staging path, which builds a complete +// directory in a private temp dir and atomically renames it into place. +// +// Destructively emptying a directory another process may be populating could +// otherwise leave a partial package behind: if the surviving files include the +// package.json completion marker, every later install treats the broken +// directory as complete and never repairs it. +function tryExclusiveImport ( + importer: Importer, + newDir: string, + filenames: Map +): boolean { + fs.mkdirSync(path.dirname(newDir), { recursive: true }) + try { + fs.mkdirSync(newDir) + } catch (err) { + if (util.types.isNativeError(err) && 'code' in err && err.code === 'EEXIST') return false + throw err + } + // We exclusively created newDir, so no other process writes into it directly + // (concurrent importers see EEXIST above and take the staging path). The + // directory is ours: on failure we remove our partial result before falling + // back to staging, so the next attempt — including this process's own method + // fallbacks (clone → hardlink → copy) — can fast-path again. + try { + tryImportIndexedDir(importer, newDir, filenames) + return true + } catch { + try { + rimrafSync(newDir) + } catch {} // eslint-disable-line:no-empty + return false + } +} + function allFilesMatch (dir: string, filenames: Map): boolean { for (const [f, src] of filenames) { const target = path.join(dir, f) diff --git a/fs/indexed-pkg-importer/test/importIndexedDir.race.test.ts b/fs/indexed-pkg-importer/test/importIndexedDir.race.test.ts index 731451019d..bcf15bae34 100644 --- a/fs/indexed-pkg-importer/test/importIndexedDir.race.test.ts +++ b/fs/indexed-pkg-importer/test/importIndexedDir.race.test.ts @@ -88,6 +88,62 @@ test('fast-path failure does not delete directory populated by another process', expect(fs.existsSync(path.join(newDir, 'index.js'))).toBe(true) }) +test('regular path writes directly when the target does not exist (cold install)', () => { + const tmp = tempDir() + const srcDir = path.join(tmp, 'src') + fs.mkdirSync(srcDir, { recursive: true }) + const srcPkgJson = path.join(srcDir, 'package.json') + const srcIndex = path.join(srcDir, 'index.js') + fs.writeFileSync(srcPkgJson, '{"name":"pkg"}') + fs.writeFileSync(srcIndex, 'content') + + const newDir = path.join(tmp, 'dest') + const filenames = new Map([ + ['index.js', srcIndex], + ['package.json', srcPkgJson], + ]) + + importIndexedDir({ importFile: fs.copyFileSync, importFileAtomic: fs.copyFileSync }, newDir, filenames, {}) + + expect(fs.existsSync(path.join(newDir, 'index.js'))).toBe(true) + expect(fs.existsSync(path.join(newDir, 'package.json'))).toBe(true) + // A cold install exclusively creates the directory and writes directly, + // so no staging rename is needed. + expect(renameOverwriteSyncMock).not.toHaveBeenCalled() +}) + +test('regular path stages instead of destructively emptying a pre-existing directory', () => { + const tmp = tempDir() + const srcDir = path.join(tmp, 'src') + fs.mkdirSync(srcDir, { recursive: true }) + const srcPkgJson = path.join(srcDir, 'package.json') + const srcLicense = path.join(srcDir, 'LICENSE') + fs.writeFileSync(srcPkgJson, '{"name":"pkg"}') + fs.writeFileSync(srcLicense, 'MIT') + + const newDir = path.join(tmp, 'dest') + // A stale/partial directory left by an interrupted or concurrent importer: + // the completion marker (package.json) is present but LICENSE is missing. + fs.mkdirSync(newDir, { recursive: true }) + fs.writeFileSync(path.join(newDir, 'package.json'), '{"name":"pkg"}') + + const filenames = new Map([ + ['LICENSE', srcLicense], + ['package.json', srcPkgJson], + ]) + + importIndexedDir({ importFile: fs.copyFileSync, importFileAtomic: fs.copyFileSync }, newDir, filenames, {}) + + // The existing directory must not be emptied in place: a concurrent importer + // could be reading it. The complete package is built in a temp stage and + // swapped in atomically via renameOverwriteSync. + expect(renameOverwriteSyncMock).toHaveBeenCalledTimes(1) + const [stage, dest] = renameOverwriteSyncMock.mock.calls[0] as [string, string] + expect(dest).toBe(newDir) + expect(fs.existsSync(path.join(stage, 'LICENSE'))).toBe(true) + expect(fs.existsSync(path.join(stage, 'package.json'))).toBe(true) +}) + test('safeToSkip creates dir when target does not exist', () => { const tmp = tempDir() const srcFile = path.join(tmp, 'src', 'index.js')