perf: skip SHA-512 rehashing and use direct file write for registry files

The worker now writes files from the registry using the pre-computed digest from the server, skipping: - crypto.hash('sha512', buffer) for every file (33K hashes on 287MB) - statSync to check if file exists first - temp file + rename for atomic write Instead, uses writeFileSync with O_CREAT|O_EXCL (flag: 'wx') which creates the file atomically or fails silently if it already exists. Also exports contentPathFromHex from @pnpm/store.cafs. Benchmark: 14.8s (down from 16.2s, baseline 22s)
2026-04-11 10:40:53 -04:00 · 2026-04-09 01:55:47 +02:00
parent 47bf860189
commit e015cee529
2 changed files with 22 additions and 9 deletions
--- a/store/cafs/src/index.ts
+++ b/store/cafs/src/index.ts
@@ -36,6 +36,7 @@ export { normalizeBundledManifest }
 export {
  buildFileMapsFromIndex,
  checkPkgFilesIntegrity,
+  contentPathFromHex,
  type FilesIndex,
  type FileType,
  getFilePathByModeInCafs,
--- a/worker/src/start.ts
+++ b/worker/src/start.ts
@@ -505,11 +505,6 @@ async function fetchAndWriteCafs (message: FetchAndWriteCafsMessage): Promise<{
  const https = await import('node:https')
  const { URL } = await import('node:url')

-  if (!cafsCache.has(message.storeDir)) {
-    cafsCache.set(message.storeDir, createCafs(message.storeDir, { cafsLocker }))
-  }
-  const cafs = cafsCache.get(message.storeDir)!
-
  // Send HTTP request to /v1/files
  const url = new URL('/v1/files', message.registryUrl)
  const requestFn = url.protocol === 'https:' ? https.request : http.request
@@ -540,16 +535,20 @@ async function fetchAndWriteCafs (message: FetchAndWriteCafsMessage): Promise<{
  const jsonLen = responseBuffer.readUInt32BE(offset)
  offset += 4 + jsonLen

-  // Read file entries
+  // Read file entries and write directly to CAFS using known digests.
+  // Unlike addFile(), this skips SHA-512 hashing (server already computed it)
+  // and uses a simple exclusive-create write (no stat check, no temp+rename).
  const END_MARKER = Buffer.alloc(64, 0)
  let filesWritten = 0
+  const { contentPathFromHex } = await import('@pnpm/store.cafs')

  while (offset < responseBuffer.length) {
    const possibleEnd = responseBuffer.subarray(offset, offset + 64)
    if (possibleEnd.length === 64 && possibleEnd.equals(END_MARKER)) break

-    // Digest: 64 bytes → hex
-    offset += 64 // skip digest (we don't need it — addFile computes its own)
+    // Digest: 64 bytes raw binary → hex
+    const digest = responseBuffer.subarray(offset, offset + 64).toString('hex')
+    offset += 64

    // Size: 4 bytes
    const size = responseBuffer.readUInt32BE(offset)
@@ -563,7 +562,20 @@ async function fetchAndWriteCafs (message: FetchAndWriteCafsMessage): Promise<{
    const content = responseBuffer.subarray(offset, offset + size)
    offset += size

-    cafs.addFile(Buffer.from(content), executable ? 0o755 : 0o644)
+    // Write directly using pre-computed digest — no rehashing
+    const fileType = executable ? 'exec' : 'nonexec'
+    const relPath = contentPathFromHex(fileType, digest)
+    const fullPath = path.join(message.storeDir, relPath)
+    const dir = path.dirname(fullPath)
+    try {
+      fs.mkdirSync(dir, { recursive: true })
+      fs.writeFileSync(fullPath, content, { flag: 'wx', mode: executable ? 0o755 : 0o644 })
+    } catch (err: unknown) {
+      // EEXIST = file already exists (from dedup or concurrent write) — fine
+      if (!(err instanceof Error && 'code' in err && (err as NodeJS.ErrnoException).code === 'EEXIST')) {
+        throw err
+      }
+    }
    filesWritten++
  }