mirror of
https://github.com/pnpm/pnpm.git
synced 2026-04-11 10:40:53 -04:00
perf: skip SHA-512 rehashing and use direct file write for registry files
The worker now writes files from the registry using the pre-computed
digest from the server, skipping:
- crypto.hash('sha512', buffer) for every file (33K hashes on 287MB)
- statSync to check if file exists first
- temp file + rename for atomic write
Instead, uses writeFileSync with O_CREAT|O_EXCL (flag: 'wx') which
creates the file atomically or fails silently if it already exists.
Also exports contentPathFromHex from @pnpm/store.cafs.
Benchmark: 14.8s (down from 16.2s, baseline 22s)
This commit is contained in:
@@ -36,6 +36,7 @@ export { normalizeBundledManifest }
|
||||
export {
|
||||
buildFileMapsFromIndex,
|
||||
checkPkgFilesIntegrity,
|
||||
contentPathFromHex,
|
||||
type FilesIndex,
|
||||
type FileType,
|
||||
getFilePathByModeInCafs,
|
||||
|
||||
@@ -505,11 +505,6 @@ async function fetchAndWriteCafs (message: FetchAndWriteCafsMessage): Promise<{
|
||||
const https = await import('node:https')
|
||||
const { URL } = await import('node:url')
|
||||
|
||||
if (!cafsCache.has(message.storeDir)) {
|
||||
cafsCache.set(message.storeDir, createCafs(message.storeDir, { cafsLocker }))
|
||||
}
|
||||
const cafs = cafsCache.get(message.storeDir)!
|
||||
|
||||
// Send HTTP request to /v1/files
|
||||
const url = new URL('/v1/files', message.registryUrl)
|
||||
const requestFn = url.protocol === 'https:' ? https.request : http.request
|
||||
@@ -540,16 +535,20 @@ async function fetchAndWriteCafs (message: FetchAndWriteCafsMessage): Promise<{
|
||||
const jsonLen = responseBuffer.readUInt32BE(offset)
|
||||
offset += 4 + jsonLen
|
||||
|
||||
// Read file entries
|
||||
// Read file entries and write directly to CAFS using known digests.
|
||||
// Unlike addFile(), this skips SHA-512 hashing (server already computed it)
|
||||
// and uses a simple exclusive-create write (no stat check, no temp+rename).
|
||||
const END_MARKER = Buffer.alloc(64, 0)
|
||||
let filesWritten = 0
|
||||
const { contentPathFromHex } = await import('@pnpm/store.cafs')
|
||||
|
||||
while (offset < responseBuffer.length) {
|
||||
const possibleEnd = responseBuffer.subarray(offset, offset + 64)
|
||||
if (possibleEnd.length === 64 && possibleEnd.equals(END_MARKER)) break
|
||||
|
||||
// Digest: 64 bytes → hex
|
||||
offset += 64 // skip digest (we don't need it — addFile computes its own)
|
||||
// Digest: 64 bytes raw binary → hex
|
||||
const digest = responseBuffer.subarray(offset, offset + 64).toString('hex')
|
||||
offset += 64
|
||||
|
||||
// Size: 4 bytes
|
||||
const size = responseBuffer.readUInt32BE(offset)
|
||||
@@ -563,7 +562,20 @@ async function fetchAndWriteCafs (message: FetchAndWriteCafsMessage): Promise<{
|
||||
const content = responseBuffer.subarray(offset, offset + size)
|
||||
offset += size
|
||||
|
||||
cafs.addFile(Buffer.from(content), executable ? 0o755 : 0o644)
|
||||
// Write directly using pre-computed digest — no rehashing
|
||||
const fileType = executable ? 'exec' : 'nonexec'
|
||||
const relPath = contentPathFromHex(fileType, digest)
|
||||
const fullPath = path.join(message.storeDir, relPath)
|
||||
const dir = path.dirname(fullPath)
|
||||
try {
|
||||
fs.mkdirSync(dir, { recursive: true })
|
||||
fs.writeFileSync(fullPath, content, { flag: 'wx', mode: executable ? 0o755 : 0o644 })
|
||||
} catch (err: unknown) {
|
||||
// EEXIST = file already exists (from dedup or concurrent write) — fine
|
||||
if (!(err instanceof Error && 'code' in err && (err as NodeJS.ErrnoException).code === 'EEXIST')) {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
filesWritten++
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user