perf: skip SHA-512 rehashing and use direct file write for registry files

The worker now writes files from the registry using the pre-computed
digest from the server, skipping:
- crypto.hash('sha512', buffer) for every file (33K hashes on 287MB)
- statSync to check if file exists first
- temp file + rename for atomic write

Instead, uses writeFileSync with O_CREAT|O_EXCL (flag: 'wx') which
creates the file atomically or fails silently if it already exists.

Also exports contentPathFromHex from @pnpm/store.cafs.

Benchmark: 14.8s (down from 16.2s, baseline 22s)
This commit is contained in:
Zoltan Kochan
2026-04-09 01:55:47 +02:00
parent 47bf860189
commit e015cee529
2 changed files with 22 additions and 9 deletions

View File

@@ -36,6 +36,7 @@ export { normalizeBundledManifest }
export {
buildFileMapsFromIndex,
checkPkgFilesIntegrity,
contentPathFromHex,
type FilesIndex,
type FileType,
getFilePathByModeInCafs,

View File

@@ -505,11 +505,6 @@ async function fetchAndWriteCafs (message: FetchAndWriteCafsMessage): Promise<{
const https = await import('node:https')
const { URL } = await import('node:url')
if (!cafsCache.has(message.storeDir)) {
cafsCache.set(message.storeDir, createCafs(message.storeDir, { cafsLocker }))
}
const cafs = cafsCache.get(message.storeDir)!
// Send HTTP request to /v1/files
const url = new URL('/v1/files', message.registryUrl)
const requestFn = url.protocol === 'https:' ? https.request : http.request
@@ -540,16 +535,20 @@ async function fetchAndWriteCafs (message: FetchAndWriteCafsMessage): Promise<{
const jsonLen = responseBuffer.readUInt32BE(offset)
offset += 4 + jsonLen
// Read file entries
// Read file entries and write directly to CAFS using known digests.
// Unlike addFile(), this skips SHA-512 hashing (server already computed it)
// and uses a simple exclusive-create write (no stat check, no temp+rename).
const END_MARKER = Buffer.alloc(64, 0)
let filesWritten = 0
const { contentPathFromHex } = await import('@pnpm/store.cafs')
while (offset < responseBuffer.length) {
const possibleEnd = responseBuffer.subarray(offset, offset + 64)
if (possibleEnd.length === 64 && possibleEnd.equals(END_MARKER)) break
// Digest: 64 bytes → hex
offset += 64 // skip digest (we don't need it — addFile computes its own)
// Digest: 64 bytes raw binary → hex
const digest = responseBuffer.subarray(offset, offset + 64).toString('hex')
offset += 64
// Size: 4 bytes
const size = responseBuffer.readUInt32BE(offset)
@@ -563,7 +562,20 @@ async function fetchAndWriteCafs (message: FetchAndWriteCafsMessage): Promise<{
const content = responseBuffer.subarray(offset, offset + size)
offset += size
cafs.addFile(Buffer.from(content), executable ? 0o755 : 0o644)
// Write directly using pre-computed digest — no rehashing
const fileType = executable ? 'exec' : 'nonexec'
const relPath = contentPathFromHex(fileType, digest)
const fullPath = path.join(message.storeDir, relPath)
const dir = path.dirname(fullPath)
try {
fs.mkdirSync(dir, { recursive: true })
fs.writeFileSync(fullPath, content, { flag: 'wx', mode: executable ? 0o755 : 0o644 })
} catch (err: unknown) {
// EEXIST = file already exists (from dedup or concurrent write) — fine
if (!(err instanceof Error && 'code' in err && (err as NodeJS.ErrnoException).code === 'EEXIST')) {
throw err
}
}
filesWritten++
}