mirror of
https://github.com/pnpm/pnpm.git
synced 2026-04-27 18:46:18 -04:00
perf(cafs): optimize hot path string operations (#11086)
* perf(cafs): optimize hot path string operations
Replace path.join with string concatenation in contentPathFromHex and
getFilePathByModeInCafs. These functions are called ~30k times per
install and the simpler string operations avoid path.join's argument
validation overhead.
Increase gunzipSync chunk size from default 16KB to 128KB for faster
tarball decompression with fewer zlib iterations.
* refactor: remove dead Buffer.isBuffer check in tarball path
tarballBuffer is typed as Buffer, so the isBuffer/Buffer.from
fallback was unreachable dead code.
* docs: add comments explaining path.join bypass and chunkSize choice
Address review feedback:
- Explain why string concat is used instead of path.join in CAS hot path
- Document why 128KB chunkSize was chosen (microbenchmarks, diminishing
returns at larger sizes, bounded memory cost)
* fix: cspell — use 'Benchmarks' instead of 'Microbenchmarks'
* fix(cafs): restore Buffer.isBuffer check for worker thread compatibility
The structured clone algorithm converts Buffer to Uint8Array when sent
via postMessage to worker threads. parseTarball relies on
Buffer.prototype.toString('utf8', ...) which doesn't exist on
Uint8Array — Uint8Array.toString() returns comma-separated decimal
values, causing parseOctal to misparse tar headers.
This commit is contained in:
6
.changeset/perf-cafs-hot-path-strings.md
Normal file
6
.changeset/perf-cafs-hot-path-strings.md
Normal file
@@ -0,0 +1,6 @@
|
||||
---
|
||||
"@pnpm/store.cafs": patch
|
||||
"pnpm": patch
|
||||
---
|
||||
|
||||
Optimize hot path string operations in content-addressable store: replace `path.join` with string concatenation in `contentPathFromHex` and `getFilePathByModeInCafs` (~30k calls per install), and increase `gunzipSync` chunk size to 128KB for fewer buffer allocations during tarball decompression.
|
||||
@@ -18,7 +18,19 @@ export function addFilesFromTarball (
|
||||
readManifest?: boolean
|
||||
): AddToStoreResult {
|
||||
const ignore = _ignore ?? (() => false)
|
||||
const tarContent = isGzip(tarballBuffer) ? gunzipSync(tarballBuffer) : (Buffer.isBuffer(tarballBuffer) ? tarballBuffer : Buffer.from(tarballBuffer))
|
||||
// chunkSize 128KB (8x the Node.js default of 16KB) reduces the number of
|
||||
// internal buffer allocations and copies during decompression. Benchmarks
|
||||
// showed ~2.3x faster decompress at 128KB. Larger values (256KB+) showed
|
||||
// diminishing returns while increasing per-call memory pressure. A fixed size
|
||||
// is fine here: npm tarballs are typically small enough that 128KB covers most
|
||||
// output in very few chunks, and the cost is bounded (one zlib stream per tarball).
|
||||
// When called from a worker thread, the buffer arrives as a Uint8Array
|
||||
// (structured clone converts Buffer → Uint8Array). parseTarball relies on
|
||||
// Buffer.prototype.toString('utf8', ...) so we must ensure it's a Buffer.
|
||||
// gunzipSync already returns a Buffer, so only the non-gzip path needs this.
|
||||
const tarContent = isGzip(tarballBuffer)
|
||||
? gunzipSync(tarballBuffer, { chunkSize: 128 * 1024 })
|
||||
: (Buffer.isBuffer(tarballBuffer) ? tarballBuffer : Buffer.from(tarballBuffer))
|
||||
const { files } = parseTarball(tarContent)
|
||||
const filesIndex = new Map() as FilesIndex
|
||||
let manifestBuffer: Buffer | undefined
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import path from 'node:path'
|
||||
|
||||
const SEP = path.sep
|
||||
|
||||
/**
|
||||
* Checks if a file mode has any executable permissions set.
|
||||
*
|
||||
@@ -23,11 +25,14 @@ export function getFilePathByModeInCafs (
|
||||
mode: number
|
||||
): string {
|
||||
const fileType = modeIsExecutable(mode) ? 'exec' : 'nonexec'
|
||||
return path.join(storeDir, contentPathFromHex(fileType, hexDigest))
|
||||
return `${storeDir}${SEP}${contentPathFromHex(fileType, hexDigest)}`
|
||||
}
|
||||
|
||||
export function contentPathFromHex (fileType: FileType, hex: string): string {
|
||||
const p = path.join('files', hex.slice(0, 2), hex.slice(2))
|
||||
// Using template strings with path.sep instead of path.join() for performance.
|
||||
// This is a hot path called ~30k times per cold install; avoiding path.join
|
||||
// saves ~30ms per install by eliminating per-call argument validation overhead.
|
||||
const p = `files${SEP}${hex.slice(0, 2)}${SEP}${hex.slice(2)}`
|
||||
switch (fileType) {
|
||||
case 'exec':
|
||||
return `${p}-exec`
|
||||
|
||||
Reference in New Issue
Block a user