From e2e0a321b3bcc566afd4d2fbfd4f0aeb2e5ec374 Mon Sep 17 00:00:00 2001 From: Zoltan Kochan Date: Sat, 24 Jan 2026 21:41:11 +0100 Subject: [PATCH] perf: optimize how the integrities of files in the CAFS are stored (#10504) --- .changeset/bright-digests-store.md | 12 ++ .changeset/pink-webs-clap.md | 5 + crypto/integrity/README.md | 47 ++++++++ crypto/integrity/package.json | 47 ++++++++ crypto/integrity/src/index.ts | 32 +++++ crypto/integrity/test/index.ts | 110 ++++++++++++++++++ crypto/integrity/test/tsconfig.json | 18 +++ crypto/integrity/tsconfig.json | 16 +++ crypto/integrity/tsconfig.lint.json | 8 ++ cspell.json | 1 + exec/plugin-commands-rebuild/test/index.ts | 2 +- .../daemon/src/createFuseHandlers.ts | 2 +- pkg-manager/core/test/install/patch.ts | 48 ++++---- pkg-manager/core/test/install/sideEffects.ts | 16 +-- pnpm-lock.yaml | 28 ++++- reviewing/license-scanner/src/getPkgInfo.ts | 6 +- store/cafs-types/src/index.ts | 9 +- store/cafs/package.json | 4 +- store/cafs/src/checkPkgFilesIntegrity.ts | 73 +++++++----- store/cafs/src/getFilePathInCafs.ts | 19 +-- store/cafs/src/index.ts | 20 ++-- store/cafs/src/readManifestFromStore.ts | 2 +- store/cafs/src/writeBufferToCafs.ts | 12 +- store/cafs/test/index.ts | 8 +- store/cafs/test/writeBufferToCafs.test.ts | 5 +- store/cafs/tsconfig.json | 6 + .../src/storeController/prune.ts | 7 +- .../src/findHash.ts | 17 ++- store/plugin-commands-store/package.json | 1 + .../src/storeStatus/index.ts | 13 ++- store/plugin-commands-store/tsconfig.json | 3 + worker/package.json | 1 + worker/src/start.ts | 40 ++++--- worker/tsconfig.json | 3 + 34 files changed, 499 insertions(+), 142 deletions(-) create mode 100644 .changeset/bright-digests-store.md create mode 100644 .changeset/pink-webs-clap.md create mode 100644 crypto/integrity/README.md create mode 100644 crypto/integrity/package.json create mode 100644 crypto/integrity/src/index.ts create mode 100644 crypto/integrity/test/index.ts create mode 100644 crypto/integrity/test/tsconfig.json create mode 100644 crypto/integrity/tsconfig.json create mode 100644 crypto/integrity/tsconfig.lint.json diff --git a/.changeset/bright-digests-store.md b/.changeset/bright-digests-store.md new file mode 100644 index 0000000000..b83eb743b8 --- /dev/null +++ b/.changeset/bright-digests-store.md @@ -0,0 +1,12 @@ +--- +"@pnpm/cafs-types": major +"@pnpm/store.cafs": major +"@pnpm/worker": major +"@pnpm/package-store": major +"@pnpm/plugin-commands-store-inspecting": major +"@pnpm/license-scanner": major +"@pnpm/modules-mounter": major +"pnpm": major +--- + +Optimized index file format to store the hash algorithm once per file instead of repeating it for every file entry. Each file entry now stores only the hex digest instead of the full integrity string (`-`). Using hex format improves performance since file paths in the content-addressable store use hex representation, eliminating base64-to-hex conversion during path lookups. diff --git a/.changeset/pink-webs-clap.md b/.changeset/pink-webs-clap.md new file mode 100644 index 0000000000..425755c4c6 --- /dev/null +++ b/.changeset/pink-webs-clap.md @@ -0,0 +1,5 @@ +--- +"@pnpm/crypto.integrity": major +--- + +Initial release. diff --git a/crypto/integrity/README.md b/crypto/integrity/README.md new file mode 100644 index 0000000000..3c16ff48a6 --- /dev/null +++ b/crypto/integrity/README.md @@ -0,0 +1,47 @@ +# @pnpm/crypto.integrity + +> Parse and format integrity strings + +Utilities for working with single-hash integrity strings in the format `algorithm-base64hash` (e.g., `sha512-abc123...`). This is the format used in pnpm lockfiles. + +## Installation + +```sh +pnpm add @pnpm/crypto.integrity +``` + +## Usage + +```ts +import { parseIntegrity } from '@pnpm/crypto.integrity' + +const { algorithm, hexDigest } = parseIntegrity('sha512-9/u6bgY2+JDlb7vzKD5STG+jIErimDgtYkdB0NxmODJuKCxBvl5CVNiCB3LFUYosWowMf37aGVlKfrU5RT4e1w==') + +console.log(algorithm) // 'sha512' +console.log(hexDigest) // 'f7fbba6e0636f890e56fbbf3283e524c6fa3204ae298382d624741d0dc6638326e282c41be5e4254d8820772c5518a2c5a8c0c7f7eda19594a7eb539453e1ed7' +``` + +## API + +### `parseIntegrity(integrity: string): ParsedIntegrity` + +Parses an integrity string and returns the algorithm and hex-encoded digest. + +Throws `PnpmError` with code `INVALID_INTEGRITY` if: +- The format is invalid (must be `algorithm-base64hash`) +- The base64 hash decodes to an empty digest + +### `formatIntegrity(algorithm: string, hexDigest: string): string` + +Formats a hex digest into an integrity string. + +```ts +import { formatIntegrity } from '@pnpm/crypto.integrity' + +const integrity = formatIntegrity('sha512', 'f7fbba6e...') +// 'sha512-9/u6bgY2+JDlb7vzKD5STG+...' +``` + +## License + +MIT diff --git a/crypto/integrity/package.json b/crypto/integrity/package.json new file mode 100644 index 0000000000..e386bad0c1 --- /dev/null +++ b/crypto/integrity/package.json @@ -0,0 +1,47 @@ +{ + "name": "@pnpm/crypto.integrity", + "version": "1100.0.0-0", + "description": "Parse and validate integrity strings", + "keywords": [ + "pnpm", + "pnpm11", + "crypto", + "integrity" + ], + "license": "MIT", + "funding": "https://opencollective.com/pnpm", + "repository": "https://github.com/pnpm/pnpm/tree/main/crypto/integrity", + "homepage": "https://github.com/pnpm/pnpm/tree/main/crypto/integrity#readme", + "bugs": { + "url": "https://github.com/pnpm/pnpm/issues" + }, + "type": "module", + "main": "lib/index.js", + "types": "lib/index.d.ts", + "exports": { + ".": "./lib/index.js" + }, + "files": [ + "lib", + "!*.map" + ], + "scripts": { + "lint": "eslint \"src/**/*.ts\" \"test/**/*.ts\"", + "_test": "cross-env NODE_OPTIONS=\"$NODE_OPTIONS --experimental-vm-modules\" jest", + "test": "pnpm run compile && pnpm run _test", + "prepublishOnly": "pnpm run compile", + "compile": "tsgo --build && pnpm run lint --fix" + }, + "dependencies": { + "@pnpm/error": "workspace:*" + }, + "devDependencies": { + "@pnpm/crypto.integrity": "workspace:*" + }, + "engines": { + "node": ">=20.19" + }, + "jest": { + "preset": "@pnpm/jest-config" + } +} diff --git a/crypto/integrity/src/index.ts b/crypto/integrity/src/index.ts new file mode 100644 index 0000000000..981723114f --- /dev/null +++ b/crypto/integrity/src/index.ts @@ -0,0 +1,32 @@ +import { PnpmError } from '@pnpm/error' + +// Matches the integrity format "algo-base64hash" +const INTEGRITY_REGEX = /^([^-]+)-([a-z0-9+/=]+)$/i + +export interface ParsedIntegrity { + algorithm: string + hexDigest: string +} + +/** + * Parses an integrity string (e.g., "sha512-base64hash") into its components. + * @throws PnpmError if the integrity format is invalid + */ +export function parseIntegrity (integrity: string): ParsedIntegrity { + const match = integrity.match(INTEGRITY_REGEX) + if (!match) { + throw new PnpmError('INVALID_INTEGRITY', `Invalid integrity format: expected "algo-base64hash", got "${integrity}"`) + } + const hexDigest = Buffer.from(match[2], 'base64').toString('hex') + if (hexDigest.length === 0) { + throw new PnpmError('INVALID_INTEGRITY', 'Invalid integrity: base64 hash decoded to empty digest') + } + return { algorithm: match[1], hexDigest } +} + +/** + * Formats a hex digest into an integrity string (e.g., "sha512-base64hash"). + */ +export function formatIntegrity (algorithm: string, hexDigest: string): string { + return `${algorithm}-${Buffer.from(hexDigest, 'hex').toString('base64')}` +} diff --git a/crypto/integrity/test/index.ts b/crypto/integrity/test/index.ts new file mode 100644 index 0000000000..6082328d13 --- /dev/null +++ b/crypto/integrity/test/index.ts @@ -0,0 +1,110 @@ +import { formatIntegrity, parseIntegrity } from '@pnpm/crypto.integrity' + +describe('parseIntegrity', () => { + it('parses a valid sha512 integrity string', () => { + // "hello" hashed with sha512, base64 encoded + const integrity = 'sha512-9/u6bgY2+JDlb7vzKD5STG+jIErimDgtYkdB0NxmODJuKCxBvl5CVNiCB3LFUYosWowMf37aGVlKfrU5RT4e1w==' + const result = parseIntegrity(integrity) + expect(result.algorithm).toBe('sha512') + expect(result.hexDigest).toBe('f7fbba6e0636f890e56fbbf3283e524c6fa3204ae298382d624741d0dc6638326e282c41be5e4254d8820772c5518a2c5a8c0c7f7eda19594a7eb539453e1ed7') + }) + + it('parses a valid sha256 integrity string', () => { + // "hello" hashed with sha256, base64 encoded + const integrity = 'sha256-LPJNul+wow4m6DsqxbninhsWHlwfp0JecwQzYpOLmCQ=' + const result = parseIntegrity(integrity) + expect(result.algorithm).toBe('sha256') + expect(result.hexDigest).toBe('2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824') + }) + + it('parses a valid sha1 integrity string', () => { + // "hello" hashed with sha1, base64 encoded + const integrity = 'sha1-qvTGHdzF6KLavt4PO0gs2a6pQ00=' + const result = parseIntegrity(integrity) + expect(result.algorithm).toBe('sha1') + expect(result.hexDigest).toBe('aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d') + }) + + it('handles algorithms with numbers', () => { + const integrity = 'sha384-OLBgp1GsljhM2TJ+sbHjaiH9txEUvgdDTAzHv2P24donTt6/529l+9Ua0vFImLlb' + const result = parseIntegrity(integrity) + expect(result.algorithm).toBe('sha384') + expect(result.hexDigest).toHaveLength(96) // 384 bits = 48 bytes = 96 hex chars + }) + + it('is case-insensitive for base64 characters', () => { + // Same hash but with mixed case (valid base64) + const integrity = 'sha256-LPJNul+wow4m6DsqxbninhsWHlwfp0JecwQzYpOLmCQ=' + const result = parseIntegrity(integrity) + expect(result.algorithm).toBe('sha256') + }) + + it('throws on missing algorithm', () => { + expect(() => parseIntegrity('LPJNul+wow4m6DsqxbninhsWHlwfp0JecwQzYpOLmCQ=')) + .toThrow('Invalid integrity format') + }) + + it('throws on empty string', () => { + expect(() => parseIntegrity('')) + .toThrow('Invalid integrity format') + }) + + it('throws on missing hash', () => { + expect(() => parseIntegrity('sha256-')) + .toThrow('Invalid integrity format') + }) + + it('throws on invalid base64 characters', () => { + expect(() => parseIntegrity('sha256-invalid!@#$%')) + .toThrow('Invalid integrity format') + }) + + it('throws on multiple dashes in algorithm', () => { + // The regex requires algorithm to have no dashes (uses [^-]+) + expect(() => parseIntegrity('sha-256-LPJNul+wow4m6DsqxbninhsWHlwfp0JecwQzYpOLmCQ=')) + .toThrow('Invalid integrity format') + }) + + it('throws when base64 decodes to empty', () => { + // Padding-only base64 decodes to empty buffer + expect(() => parseIntegrity('sha256-====')) + .toThrow('base64 hash decoded to empty digest') + }) + + it('handles base64 without padding', () => { + // Some systems omit padding + const integrity = 'sha256-LPJNul+wow4m6DsqxbninhsWHlwfp0JecwQzYpOLmCQ' + const result = parseIntegrity(integrity) + expect(result.algorithm).toBe('sha256') + // Node's Buffer.from handles missing padding gracefully + expect(result.hexDigest).toBeTruthy() + }) + + it('handles base64 special characters (+ and /)', () => { + const integrity = 'sha512-abc+def/ghi=' + const result = parseIntegrity(integrity) + expect(result.algorithm).toBe('sha512') + expect(result.hexDigest).toBeTruthy() + }) +}) + +describe('formatIntegrity', () => { + it('formats a sha512 hex digest to integrity string', () => { + const hexDigest = 'f7fbba6e0636f890e56fbbf3283e524c6fa3204ae298382d624741d0dc6638326e282c41be5e4254d8820772c5518a2c5a8c0c7f7eda19594a7eb539453e1ed7' + const result = formatIntegrity('sha512', hexDigest) + expect(result).toBe('sha512-9/u6bgY2+JDlb7vzKD5STG+jIErimDgtYkdB0NxmODJuKCxBvl5CVNiCB3LFUYosWowMf37aGVlKfrU5RT4e1w==') + }) + + it('formats a sha256 hex digest to integrity string', () => { + const hexDigest = '2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824' + const result = formatIntegrity('sha256', hexDigest) + expect(result).toBe('sha256-LPJNul+wow4m6DsqxbninhsWHlwfp0JecwQzYpOLmCQ=') + }) + + it('roundtrips with parseIntegrity', () => { + const original = 'sha512-9/u6bgY2+JDlb7vzKD5STG+jIErimDgtYkdB0NxmODJuKCxBvl5CVNiCB3LFUYosWowMf37aGVlKfrU5RT4e1w==' + const { algorithm, hexDigest } = parseIntegrity(original) + const formatted = formatIntegrity(algorithm, hexDigest) + expect(formatted).toBe(original) + }) +}) diff --git a/crypto/integrity/test/tsconfig.json b/crypto/integrity/test/tsconfig.json new file mode 100644 index 0000000000..67ce5e1d0e --- /dev/null +++ b/crypto/integrity/test/tsconfig.json @@ -0,0 +1,18 @@ +{ + "extends": "../tsconfig.json", + "compilerOptions": { + "noEmit": false, + "outDir": "../node_modules/.test.lib", + "rootDir": "..", + "isolatedModules": true + }, + "include": [ + "**/*.ts", + "../../../__typings__/**/*.d.ts" + ], + "references": [ + { + "path": ".." + } + ] +} diff --git a/crypto/integrity/tsconfig.json b/crypto/integrity/tsconfig.json new file mode 100644 index 0000000000..019cba19e7 --- /dev/null +++ b/crypto/integrity/tsconfig.json @@ -0,0 +1,16 @@ +{ + "extends": "@pnpm/tsconfig", + "compilerOptions": { + "outDir": "lib", + "rootDir": "src" + }, + "include": [ + "src/**/*.ts", + "../../__typings__/**/*.d.ts" + ], + "references": [ + { + "path": "../../packages/error" + } + ] +} diff --git a/crypto/integrity/tsconfig.lint.json b/crypto/integrity/tsconfig.lint.json new file mode 100644 index 0000000000..1bbe711971 --- /dev/null +++ b/crypto/integrity/tsconfig.lint.json @@ -0,0 +1,8 @@ +{ + "extends": "./tsconfig.json", + "include": [ + "src/**/*.ts", + "test/**/*.ts", + "../../__typings__/**/*.d.ts" + ] +} diff --git a/cspell.json b/cspell.json index bfbf8fb547..5f5feebf25 100644 --- a/cspell.json +++ b/cspell.json @@ -267,6 +267,7 @@ "subdir", "subdirs", "subpkg", + "subresource", "supercede", "syml", "syncer", diff --git a/exec/plugin-commands-rebuild/test/index.ts b/exec/plugin-commands-rebuild/test/index.ts index b2b7411017..57ba5469ec 100644 --- a/exec/plugin-commands-rebuild/test/index.ts +++ b/exec/plugin-commands-rebuild/test/index.ts @@ -116,7 +116,7 @@ test('skipIfHasSideEffectsCache', async () => { [sideEffectsKey, { added: new Map([ ['foo', { - integrity: 'bar', + digest: 'bar', mode: 1, size: 1, }], diff --git a/modules-mounter/daemon/src/createFuseHandlers.ts b/modules-mounter/daemon/src/createFuseHandlers.ts index e205aec4b6..0084d432ef 100644 --- a/modules-mounter/daemon/src/createFuseHandlers.ts +++ b/modules-mounter/daemon/src/createFuseHandlers.ts @@ -53,7 +53,7 @@ export function createFuseHandlersFromLockfile (lockfile: LockfileObject, storeD cb(-1) return } - const filePathInStore = getFilePathByModeInCafs(storeDir, fileInfo.integrity, fileInfo.mode) + const filePathInStore = getFilePathByModeInCafs(storeDir, fileInfo.digest, fileInfo.mode) fs.open(filePathInStore, flags, (err, fd) => { if (err != null) { cb(-1) diff --git a/pkg-manager/core/test/install/patch.ts b/pkg-manager/core/test/install/patch.ts index f3e6c5454a..930068d3fb 100644 --- a/pkg-manager/core/test/install/patch.ts +++ b/pkg-manager/core/test/install/patch.ts @@ -62,12 +62,12 @@ test('patch package with exact version', async () => { const sideEffectsKey = `${ENGINE_NAME};patch=${patchFileHash}` expect(filesIndex.sideEffects!.has(sideEffectsKey)).toBeTruthy() expect(filesIndex.sideEffects!.get(sideEffectsKey)!.added).toBeTruthy() - const patchedFileIntegrity = filesIndex.sideEffects!.get(sideEffectsKey)!.added!.get('index.js')?.integrity - expect(patchedFileIntegrity).toBeTruthy() - const originalFileIntegrity = filesIndex.files.get('index.js')!.integrity - expect(originalFileIntegrity).toBeTruthy() - // The integrity of the original file differs from the integrity of the patched file - expect(originalFileIntegrity).not.toEqual(patchedFileIntegrity) + const patchedFileDigest = filesIndex.sideEffects!.get(sideEffectsKey)!.added!.get('index.js')?.digest + expect(patchedFileDigest).toBeTruthy() + const originalFileDigest = filesIndex.files.get('index.js')!.digest + expect(originalFileDigest).toBeTruthy() + // The digest of the original file differs from the digest of the patched file + expect(originalFileDigest).not.toEqual(patchedFileDigest) // The same with frozen lockfile rimraf('node_modules') @@ -160,12 +160,12 @@ test('patch package with version range', async () => { const sideEffectsKey = `${ENGINE_NAME};patch=${patchFileHash}` expect(filesIndex.sideEffects!.has(sideEffectsKey)).toBeTruthy() expect(filesIndex.sideEffects!.get(sideEffectsKey)!.added).toBeTruthy() - const patchedFileIntegrity = filesIndex.sideEffects!.get(sideEffectsKey)!.added!.get('index.js')?.integrity - expect(patchedFileIntegrity).toBeTruthy() - const originalFileIntegrity = filesIndex.files.get('index.js')!.integrity - expect(originalFileIntegrity).toBeTruthy() - // The integrity of the original file differs from the integrity of the patched file - expect(originalFileIntegrity).not.toEqual(patchedFileIntegrity) + const patchedFileDigest = filesIndex.sideEffects!.get(sideEffectsKey)!.added!.get('index.js')?.digest + expect(patchedFileDigest).toBeTruthy() + const originalFileDigest = filesIndex.files.get('index.js')!.digest + expect(originalFileDigest).toBeTruthy() + // The digest of the original file differs from the digest of the patched file + expect(originalFileDigest).not.toEqual(patchedFileDigest) // The same with frozen lockfile rimraf('node_modules') @@ -330,12 +330,12 @@ test('patch package when scripts are ignored', async () => { const sideEffectsKey = `${ENGINE_NAME};patch=${patchFileHash}` expect(filesIndex.sideEffects!.has(sideEffectsKey)).toBeTruthy() expect(filesIndex.sideEffects!.get(sideEffectsKey)!.added).toBeTruthy() - const patchedFileIntegrity = filesIndex.sideEffects!.get(sideEffectsKey)!.added!.get('index.js')?.integrity - expect(patchedFileIntegrity).toBeTruthy() - const originalFileIntegrity = filesIndex.files.get('index.js')!.integrity - expect(originalFileIntegrity).toBeTruthy() - // The integrity of the original file differs from the integrity of the patched file - expect(originalFileIntegrity).not.toEqual(patchedFileIntegrity) + const patchedFileDigest = filesIndex.sideEffects!.get(sideEffectsKey)!.added!.get('index.js')?.digest + expect(patchedFileDigest).toBeTruthy() + const originalFileDigest = filesIndex.files.get('index.js')!.digest + expect(originalFileDigest).toBeTruthy() + // The digest of the original file differs from the digest of the patched file + expect(originalFileDigest).not.toEqual(patchedFileDigest) // The same with frozen lockfile rimraf('node_modules') @@ -421,12 +421,12 @@ test('patch package when the package is not in allowBuilds list', async () => { const sideEffectsKey = `${ENGINE_NAME};patch=${patchFileHash}` expect(filesIndex.sideEffects!.has(sideEffectsKey)).toBeTruthy() expect(filesIndex.sideEffects!.get(sideEffectsKey)!.added).toBeTruthy() - const patchedFileIntegrity = filesIndex.sideEffects!.get(sideEffectsKey)!.added!.get('index.js')?.integrity - expect(patchedFileIntegrity).toBeTruthy() - const originalFileIntegrity = filesIndex.files.get('index.js')!.integrity - expect(originalFileIntegrity).toBeTruthy() - // The integrity of the original file differs from the integrity of the patched file - expect(originalFileIntegrity).not.toEqual(patchedFileIntegrity) + const patchedFileDigest = filesIndex.sideEffects!.get(sideEffectsKey)!.added!.get('index.js')?.digest + expect(patchedFileDigest).toBeTruthy() + const originalFileDigest = filesIndex.files.get('index.js')!.digest + expect(originalFileDigest).toBeTruthy() + // The digest of the original file differs from the digest of the patched file + expect(originalFileDigest).not.toEqual(patchedFileDigest) // The same with frozen lockfile rimraf('node_modules') diff --git a/pkg-manager/core/test/install/sideEffects.ts b/pkg-manager/core/test/install/sideEffects.ts index 03a7dc41e4..fa71f32d77 100644 --- a/pkg-manager/core/test/install/sideEffects.ts +++ b/pkg-manager/core/test/install/sideEffects.ts @@ -198,14 +198,14 @@ test('a postinstall script does not modify the original sources added to the sto id: `@pnpm/postinstall-modifies-source@1.0.0:${getIntegrity('@pnpm/postinstall-modifies-source', '1.0.0')}`, deps: {}, })}`)! - const patchedFileIntegrity = sideEffectEntry.added!.get('empty-file.txt')?.integrity - expect(patchedFileIntegrity).toBeTruthy() - const originalFileIntegrity = filesIndex.files.get('empty-file.txt')!.integrity - expect(originalFileIntegrity).toBeTruthy() - // The integrity of the original file differs from the integrity of the patched file - expect(originalFileIntegrity).not.toEqual(patchedFileIntegrity) + const patchedFileDigest = sideEffectEntry.added!.get('empty-file.txt')?.digest + expect(patchedFileDigest).toBeTruthy() + const originalFileDigest = filesIndex.files.get('empty-file.txt')!.digest + expect(originalFileDigest).toBeTruthy() + // The digest of the original file differs from the digest of the patched file + expect(originalFileDigest).not.toEqual(patchedFileDigest) - expect(fs.readFileSync(getFilePathByModeInCafs(opts.storeDir, originalFileIntegrity, 420), 'utf8')).toBe('') + expect(fs.readFileSync(getFilePathByModeInCafs(opts.storeDir, originalFileDigest, 420), 'utf8')).toBe('') }) test('a corrupted side-effects cache is ignored', async () => { @@ -237,7 +237,7 @@ test('a corrupted side-effects cache is ignored', async () => { expect(filesIndex.sideEffects!.get(sideEffectsKey)!.added).toBeTruthy() expect(filesIndex.sideEffects!.get(sideEffectsKey)!.added!.has('generated-by-preinstall.js')).toBeTruthy() const sideEffectFileStat = filesIndex.sideEffects!.get(sideEffectsKey)!.added!.get('generated-by-preinstall.js')! - const sideEffectFile = getFilePathByModeInCafs(opts.storeDir, sideEffectFileStat.integrity, sideEffectFileStat.mode) + const sideEffectFile = getFilePathByModeInCafs(opts.storeDir, sideEffectFileStat.digest, sideEffectFileStat.mode) expect(fs.existsSync(sideEffectFile)).toBeTruthy() rimraf(sideEffectFile) // we remove the side effect file to break the store diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 3b7614e70c..b5aa33e86c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -2056,6 +2056,16 @@ importers: specifier: 'catalog:' version: 3.1.7 + crypto/integrity: + dependencies: + '@pnpm/error': + specifier: workspace:* + version: link:../../packages/error + devDependencies: + '@pnpm/crypto.integrity': + specifier: workspace:* + version: 'link:' + crypto/object-hasher: dependencies: object-hash: @@ -8144,6 +8154,12 @@ importers: store/cafs: dependencies: + '@pnpm/crypto.integrity': + specifier: workspace:* + version: link:../../crypto/integrity + '@pnpm/error': + specifier: workspace:* + version: link:../../packages/error '@pnpm/fetcher-base': specifier: workspace:* version: link:../../fetching/fetcher-base @@ -8168,9 +8184,6 @@ importers: rename-overwrite: specifier: 'catalog:' version: 6.0.2 - ssri: - specifier: 'catalog:' - version: 13.0.0 strip-bom: specifier: 'catalog:' version: 5.0.0 @@ -8193,9 +8206,6 @@ importers: '@types/node': specifier: 'catalog:' version: 22.15.30 - '@types/ssri': - specifier: 'catalog:' - version: 7.1.5 symlink-dir: specifier: 'catalog:' version: 7.0.0 @@ -8342,6 +8352,9 @@ importers: '@pnpm/config': specifier: workspace:* version: link:../../config/config + '@pnpm/crypto.integrity': + specifier: workspace:* + version: link:../../crypto/integrity '@pnpm/dependency-path': specifier: workspace:* version: link:../../packages/dependency-path @@ -8728,6 +8741,9 @@ importers: '@pnpm/create-cafs-store': specifier: workspace:* version: link:../store/create-cafs-store + '@pnpm/crypto.integrity': + specifier: workspace:* + version: link:../crypto/integrity '@pnpm/error': specifier: workspace:* version: link:../packages/error diff --git a/reviewing/license-scanner/src/getPkgInfo.ts b/reviewing/license-scanner/src/getPkgInfo.ts index d67bbedebb..07e7ddad2c 100644 --- a/reviewing/license-scanner/src/getPkgInfo.ts +++ b/reviewing/license-scanner/src/getPkgInfo.ts @@ -221,8 +221,8 @@ async function parseLicense ( * @param opts the options for reading file * @returns Promise */ -async function readLicenseFileFromCafs (storeDir: string, { integrity, mode }: PackageFileInfo): Promise { - const fileName = getFilePathByModeInCafs(storeDir, integrity, mode) +async function readLicenseFileFromCafs (storeDir: string, { digest, mode }: PackageFileInfo): Promise { + const fileName = getFilePathByModeInCafs(storeDir, digest, mode) const fileContents = await readFile(fileName) return fileContents } @@ -363,7 +363,7 @@ export async function getPkgInfo ( const packageManifestFile = packageFileIndex.get('package.json') as PackageFileInfo packageManifestDir = getFilePathByModeInCafs( opts.storeDir, - packageManifestFile.integrity, + packageManifestFile.digest, packageManifestFile.mode ) } diff --git a/store/cafs-types/src/index.ts b/store/cafs-types/src/index.ts index 94dc70cc2d..7a577773ce 100644 --- a/store/cafs-types/src/index.ts +++ b/store/cafs-types/src/index.ts @@ -1,11 +1,10 @@ -import type { IntegrityLike } from 'ssri' import type { DependencyManifest } from '@pnpm/types' export type PackageFiles = Map export interface PackageFileInfo { checkedAt?: number // Nullable for backward compatibility - integrity: string + digest: string mode: number size: number } @@ -59,7 +58,7 @@ export type FilesIndex = Map AddToStoreResult addFilesFromTarball: (buffer: Buffer) => AddToStoreResult addFile: (buffer: Buffer, mode: number) => FileWriteResult - getIndexFilePathInCafs: (integrity: string | IntegrityLike, fileType: FileType) => string - getFilePathByModeInCafs: (integrity: string | IntegrityLike, mode: number) => string + getIndexFilePathInCafs: (integrity: string, pkgId: string) => string + getFilePathByModeInCafs: (digest: string, mode: number) => string importPackage: ImportPackageFunction tempDir: () => Promise } diff --git a/store/cafs/package.json b/store/cafs/package.json index ceeeabca7d..c6b307e712 100644 --- a/store/cafs/package.json +++ b/store/cafs/package.json @@ -31,6 +31,8 @@ "prepublishOnly": "pnpm run compile" }, "dependencies": { + "@pnpm/crypto.integrity": "workspace:*", + "@pnpm/error": "workspace:*", "@pnpm/fetcher-base": "workspace:*", "@pnpm/graceful-fs": "workspace:*", "@pnpm/store-controller-types": "workspace:*", @@ -39,7 +41,6 @@ "is-subdir": "catalog:", "p-limit": "catalog:", "rename-overwrite": "catalog:", - "ssri": "catalog:", "strip-bom": "catalog:" }, "devDependencies": { @@ -49,7 +50,6 @@ "@pnpm/types": "workspace:*", "@types/is-gzip": "catalog:", "@types/node": "catalog:", - "@types/ssri": "catalog:", "symlink-dir": "catalog:", "tempy": "catalog:" }, diff --git a/store/cafs/src/checkPkgFilesIntegrity.ts b/store/cafs/src/checkPkgFilesIntegrity.ts index 01f5a5fb1d..86579aa8dd 100644 --- a/store/cafs/src/checkPkgFilesIntegrity.ts +++ b/store/cafs/src/checkPkgFilesIntegrity.ts @@ -1,14 +1,20 @@ +import crypto from 'crypto' import fs from 'fs' import util from 'util' +import { PnpmError } from '@pnpm/error' import { type PackageFiles, type PackageFileInfo, type SideEffects, type FilesMap } from '@pnpm/cafs-types' import gfs from '@pnpm/graceful-fs' import { type DependencyManifest } from '@pnpm/types' import rimraf from '@zkochan/rimraf' -import ssri from 'ssri' import { getFilePathByModeInCafs } from './getFilePathInCafs.js' import { parseJsonBufferSync } from './parseJson.js' import { readManifestFromStore } from './readManifestFromStore.js' +export interface Integrity { + digest: string + algorithm: string +} + // We track how many files were checked during installation. // It should be rare that a files content should be checked. // If it happens too frequently, something is wrong. @@ -31,6 +37,7 @@ export interface PackageFilesIndex { name?: string version?: string requiresBuild?: boolean + algo: string files: PackageFiles sideEffects?: SideEffects @@ -45,7 +52,7 @@ export function checkPkgFilesIntegrity ( // but there's a smaller chance that the same file will be checked twice // so it's probably not worth the memory (this assumption should be verified) const verifiedFilesCache = new Set() - const _checkFilesIntegrity = checkFilesIntegrity.bind(null, verifiedFilesCache, storeDir) + const _checkFilesIntegrity = checkFilesIntegrity.bind(null, verifiedFilesCache, storeDir, pkgIndex.algo) const verified = _checkFilesIntegrity(pkgIndex.files, readManifest) if (!verified.passed) return verified @@ -87,7 +94,7 @@ export function buildFileMapsFromIndex ( const filesMap: FilesMap = new Map() for (const [f, fstat] of pkgIndex.files) { - const filename = getFilePathByModeInCafs(storeDir, fstat.integrity, fstat.mode) + const filename = getFilePathByModeInCafs(storeDir, fstat.digest, fstat.mode) filesMap.set(f, filename) } @@ -99,7 +106,7 @@ export function buildFileMapsFromIndex ( if (added) { const addedFilesMap: FilesMap = new Map() for (const [f, fstat] of added) { - const filename = getFilePathByModeInCafs(storeDir, fstat.integrity, fstat.mode) + const filename = getFilePathByModeInCafs(storeDir, fstat.digest, fstat.mode) addedFilesMap.set(f, filename) } sideEffectEntry.added = addedFilesMap @@ -124,6 +131,7 @@ export function buildFileMapsFromIndex ( function checkFilesIntegrity ( verifiedFilesCache: Set, storeDir: string, + algo: string, files: PackageFiles, readManifest?: boolean ): VerifyResult { @@ -132,15 +140,15 @@ function checkFilesIntegrity ( const filesMap: FilesMap = new Map() for (const [f, fstat] of files) { - if (!fstat.integrity) { - throw new Error(`Integrity checksum is missing for ${f}`) + if (!fstat.digest) { + throw new PnpmError('MISSING_CONTENT_DIGEST', `Content digest is missing for ${f}`) } - const filename = getFilePathByModeInCafs(storeDir, fstat.integrity, fstat.mode) + const filename = getFilePathByModeInCafs(storeDir, fstat.digest, fstat.mode) filesMap.set(f, filename) const readFile = readManifest && f === 'package.json' if (!readFile && verifiedFilesCache.has(filename)) continue - const verifyResult = verifyFile(filename, fstat, readFile) + const verifyResult = verifyFile(filename, fstat, algo, readFile) if (readFile) { manifest = verifyResult.manifest } @@ -157,13 +165,12 @@ function checkFilesIntegrity ( } } -type FileInfo = Pick & { - integrity: string | ssri.IntegrityLike -} +type FileInfo = Pick function verifyFile ( filename: string, fstat: FileInfo, + algorithm: string, readManifest?: boolean ): Pick { const currentFile = checkFile(filename, fstat.checkedAt) @@ -173,7 +180,7 @@ function verifyFile ( rimraf.sync(filename) return { passed: false } } - return verifyFileIntegrity(filename, fstat, readManifest) + return verifyFileIntegrity(filename, { digest: fstat.digest, algorithm }, readManifest) } if (readManifest) { return { @@ -188,35 +195,39 @@ function verifyFile ( export function verifyFileIntegrity ( filename: string, - expectedFile: FileInfo, + integrity: Integrity, readManifest?: boolean ): Pick { // @ts-expect-error global['verifiedFileIntegrity']++ + let data: Buffer try { - const data = gfs.readFileSync(filename) - const passed = Boolean(ssri.checkData(data, expectedFile.integrity)) - if (!passed) { - gfs.unlinkSync(filename) - return { passed } - } else if (readManifest) { - return { - passed, - manifest: parseJsonBufferSync(data) as DependencyManifest, - } - } - return { passed } + data = gfs.readFileSync(filename) } catch (err: unknown) { - switch (util.types.isNativeError(err) && 'code' in err && err.code) { - case 'ENOENT': return { passed: false } - case 'EINTEGRITY': { - // Broken files are removed from the store - gfs.unlinkSync(filename) + if (util.types.isNativeError(err) && 'code' in err && err.code === 'ENOENT') { return { passed: false } } - } throw err } + let computedDigest: string + try { + computedDigest = crypto.hash(integrity.algorithm, data, 'hex') + } catch { + // Invalid algorithm (e.g., corrupted index file) - treat as verification failure + return { passed: false } + } + const passed = computedDigest === integrity.digest + if (!passed) { + gfs.unlinkSync(filename) + return { passed } + } + if (readManifest) { + return { + passed, + manifest: parseJsonBufferSync(data) as DependencyManifest, + } + } + return { passed } } function checkFile (filename: string, checkedAt?: number): { isModified: boolean, size: number } | null { diff --git a/store/cafs/src/getFilePathInCafs.ts b/store/cafs/src/getFilePathInCafs.ts index 18a0f93448..90c245fe62 100644 --- a/store/cafs/src/getFilePathInCafs.ts +++ b/store/cafs/src/getFilePathInCafs.ts @@ -1,5 +1,5 @@ import path from 'path' -import ssri, { type IntegrityLike } from 'ssri' +import { parseIntegrity } from '@pnpm/crypto.integrity' /** * Checks if a file mode has any executable permissions set. @@ -20,19 +20,20 @@ export type FileType = 'exec' | 'nonexec' export function getFilePathByModeInCafs ( storeDir: string, - integrity: string | IntegrityLike, + hexDigest: string, mode: number ): string { const fileType = modeIsExecutable(mode) ? 'exec' : 'nonexec' - return path.join(storeDir, contentPathFromIntegrity(integrity, fileType)) + return path.join(storeDir, contentPathFromHex(fileType, hexDigest)) } export function getIndexFilePathInCafs ( storeDir: string, - integrity: string | IntegrityLike, + integrity: string, pkgId: string ): string { - const hex = ssri.parse(integrity, { single: true }).hexDigest().substring(0, 64) + const { hexDigest } = parseIntegrity(integrity) + const hex = hexDigest.substring(0, 64) // Some registries allow identical content to be published under different package names or versions. // To accommodate this, index files are stored using both the content hash and package identifier. // This approach ensures that we can: @@ -42,14 +43,6 @@ export function getIndexFilePathInCafs ( return path.join(storeDir, `index/${path.join(hex.slice(0, 2), hex.slice(2))}-${pkgId.replace(/[\\/:*?"<>|]/g, '+')}.mpk`) } -function contentPathFromIntegrity ( - integrity: string | IntegrityLike, - fileType: FileType -): string { - const sri = ssri.parse(integrity, { single: true }) - return contentPathFromHex(fileType, sri.hexDigest()) -} - export function contentPathFromHex (fileType: FileType, hex: string): string { const p = path.join('files', hex.slice(0, 2), hex.slice(2)) switch (fileType) { diff --git a/store/cafs/src/index.ts b/store/cafs/src/index.ts index 9590214b15..cbd4c6f9f5 100644 --- a/store/cafs/src/index.ts +++ b/store/cafs/src/index.ts @@ -1,3 +1,4 @@ +import crypto from 'crypto' import { type AddToStoreResult, type FileWriteResult, @@ -7,12 +8,12 @@ import { type SideEffects, type SideEffectsDiff, } from '@pnpm/cafs-types' -import ssri from 'ssri' import { addFilesFromDir } from './addFilesFromDir.js' import { addFilesFromTarball } from './addFilesFromTarball.js' import { checkPkgFilesIntegrity, buildFileMapsFromIndex, + type Integrity, type PackageFilesIndex, type VerifyResult, } from './checkPkgFilesIntegrity.js' @@ -26,7 +27,7 @@ import { } from './getFilePathInCafs.js' import { optimisticRenameOverwrite, writeBufferToCafs } from './writeBufferToCafs.js' -export type { IntegrityLike } from 'ssri' +export const HASH_ALGORITHM = 'sha512' export { checkPkgFilesIntegrity, @@ -35,6 +36,7 @@ export { type FileType, getFilePathByModeInCafs, getIndexFilePathInCafs, + type Integrity, type PackageFileInfo, type PackageFiles, type PackageFilesIndex, @@ -56,8 +58,8 @@ export interface CafsFunctions { addFilesFromDir: (dirname: string, opts?: { files?: string[], readManifest?: boolean }) => AddToStoreResult addFilesFromTarball: (tarballBuffer: Buffer, readManifest?: boolean) => AddToStoreResult addFile: (buffer: Buffer, mode: number) => FileWriteResult - getIndexFilePathInCafs: (integrity: string | ssri.IntegrityLike, fileType: FileType) => string - getFilePathByModeInCafs: (integrity: string | ssri.IntegrityLike, mode: number) => string + getIndexFilePathInCafs: (integrity: string, pkgId: string) => string + getFilePathByModeInCafs: (digest: string, mode: number) => string } export function createCafs (storeDir: string, { ignoreFile, cafsLocker }: CreateCafsOpts = {}): CafsFunctions { @@ -72,7 +74,7 @@ export function createCafs (storeDir: string, { ignoreFile, cafsLocker }: Create } } -type WriteBufferToCafs = (buffer: Buffer, fileDest: string, mode: number | undefined, integrity: ssri.IntegrityLike) => { checkedAt: number, filePath: string } +type WriteBufferToCafs = (buffer: Buffer, fileDest: string, mode: number | undefined, integrity: Integrity) => { checkedAt: number, filePath: string } function addBufferToCafs ( writeBufferToCafs: WriteBufferToCafs, @@ -82,14 +84,14 @@ function addBufferToCafs ( // Calculating the integrity of the file is surprisingly fast. // 30K files are calculated in 1 second. // Hence, from a performance perspective, there is no win in fetching the package index file from the registry. - const integrity = ssri.fromData(buffer) + const digest = crypto.hash(HASH_ALGORITHM, buffer, 'hex') const isExecutable = modeIsExecutable(mode) - const fileDest = contentPathFromHex(isExecutable ? 'exec' : 'nonexec', integrity.hexDigest()) + const fileDest = contentPathFromHex(isExecutable ? 'exec' : 'nonexec', digest) const { checkedAt, filePath } = writeBufferToCafs( buffer, fileDest, isExecutable ? 0o755 : undefined, - integrity + { digest, algorithm: HASH_ALGORITHM } ) - return { checkedAt, integrity, filePath } + return { checkedAt, filePath, digest } } diff --git a/store/cafs/src/readManifestFromStore.ts b/store/cafs/src/readManifestFromStore.ts index 2919033602..4f52a40b5f 100644 --- a/store/cafs/src/readManifestFromStore.ts +++ b/store/cafs/src/readManifestFromStore.ts @@ -7,7 +7,7 @@ import { parseJsonBufferSync } from './parseJson.js' export function readManifestFromStore (storeDir: string, pkgIndex: PackageFilesIndex): PackageManifest | undefined { const pkg = pkgIndex.files.get('package.json') if (pkg) { - const fileName = getFilePathByModeInCafs(storeDir, pkg.integrity, pkg.mode) + const fileName = getFilePathByModeInCafs(storeDir, pkg.digest, pkg.mode) return parseJsonBufferSync(gfs.readFileSync(fileName)) as PackageManifest } return undefined diff --git a/store/cafs/src/writeBufferToCafs.ts b/store/cafs/src/writeBufferToCafs.ts index 0245672871..722e315d19 100644 --- a/store/cafs/src/writeBufferToCafs.ts +++ b/store/cafs/src/writeBufferToCafs.ts @@ -3,8 +3,7 @@ import path from 'path' import workerThreads from 'worker_threads' import util from 'util' import renameOverwrite from 'rename-overwrite' -import type ssri from 'ssri' -import { verifyFileIntegrity } from './checkPkgFilesIntegrity.js' +import { type Integrity, verifyFileIntegrity } from './checkPkgFilesIntegrity.js' import { writeFile } from './writeFile.js' export function writeBufferToCafs ( @@ -13,7 +12,7 @@ export function writeBufferToCafs ( buffer: Buffer, fileDest: string, mode: number | undefined, - integrity: ssri.IntegrityLike + integrity: Integrity ): { checkedAt: number, filePath: string } { fileDest = path.join(storeDir, fileDest) if (locker.has(fileDest)) { @@ -103,11 +102,8 @@ function removeSuffix (filePath: string): string { return withoutSuffix } -function existsSame (filename: string, integrity: ssri.IntegrityLike): boolean { +function existsSame (filename: string, integrity: Integrity): boolean { const existingFile = fs.statSync(filename, { throwIfNoEntry: false }) if (!existingFile) return false - return verifyFileIntegrity(filename, { - size: existingFile.size, - integrity, - }).passed + return verifyFileIntegrity(filename, integrity).passed } diff --git a/store/cafs/test/index.ts b/store/cafs/test/index.ts index dbe2c57e98..04ef204b74 100644 --- a/store/cafs/test/index.ts +++ b/store/cafs/test/index.ts @@ -24,7 +24,7 @@ describe('cafs', () => { expect(pkgFile!.size).toBe(1121) expect(pkgFile!.mode).toBe(420) expect(typeof pkgFile!.checkedAt).toBe('number') - expect(pkgFile!.integrity.toString()).toBe('sha512-8xCvrlC7W3TlwXxetv5CZTi53szYhmT7tmpXF/ttNthtTR9TC7Y7WJFPmJToHaSQ4uObuZyOARdOJYNYuTSbXA==') + expect(pkgFile!.digest).toBe('f310afae50bb5b74e5c17c5eb6fe426538b9deccd88664fbb66a5717fb6d36d86d4d1f530bb63b58914f9894e81da490e2e39bb99c8e01174e258358b9349b5c') }) it('replaces an already existing file, if the integrity of it was broken', () => { @@ -35,7 +35,8 @@ describe('cafs', () => { let addFilesResult = addFiles() // Modifying the file in the store - const filePath = getFilePathByModeInCafs(storeDir, addFilesResult.filesIndex.get('foo.txt')!.integrity, 420) + const { digest } = addFilesResult.filesIndex.get('foo.txt')! + const filePath = getFilePathByModeInCafs(storeDir, digest, 420) fs.appendFileSync(filePath, 'bar') addFilesResult = addFiles() @@ -152,9 +153,10 @@ describe('checkPkgFilesIntegrity()', () => { it("doesn't fail if file was removed from the store", () => { const storeDir = temporaryDirectory() expect(checkPkgFilesIntegrity(storeDir, { + algo: 'sha512', files: new Map([ ['foo', { - integrity: 'sha512-8xCvrlC7W3TlwXxetv5CZTi53szYhmT7tmpXF/ttNthtTR9TC7Y7WJFPmJToHaSQ4uObuZyOARdOJYNYuTSbXA==', + digest: 'f310afae50bb5b74e5c17c5eb6fe426538b9deccd88664fbb66a5717fb6d36d86d4d1f530bb63b58914f9894e81da490e2e39bb99c8e01174e258358b9349b5c', mode: 420, size: 10, }], diff --git a/store/cafs/test/writeBufferToCafs.test.ts b/store/cafs/test/writeBufferToCafs.test.ts index 3be396e7b6..bf4c72cd50 100644 --- a/store/cafs/test/writeBufferToCafs.test.ts +++ b/store/cafs/test/writeBufferToCafs.test.ts @@ -1,6 +1,6 @@ +import crypto from 'crypto' import fs from 'fs' import path from 'path' -import ssri from 'ssri' import { temporaryDirectory } from 'tempy' import { pathTemp, writeBufferToCafs } from '../src/writeBufferToCafs.js' @@ -11,7 +11,8 @@ describe('writeBufferToCafs', () => { const buffer = Buffer.from('abc') const fullFileDest = path.join(storeDir, fileDest) fs.writeFileSync(pathTemp(fullFileDest), 'ccc', 'utf8') - writeBufferToCafs(new Map(), storeDir, buffer, fileDest, 420, ssri.fromData(buffer)) + const digest = crypto.hash('sha512', buffer, 'hex') + writeBufferToCafs(new Map(), storeDir, buffer, fileDest, 420, { digest, algorithm: 'sha512' }) expect(fs.readFileSync(fullFileDest, 'utf8')).toBe('abc') }) }) diff --git a/store/cafs/tsconfig.json b/store/cafs/tsconfig.json index e00a610153..b2c9df6623 100644 --- a/store/cafs/tsconfig.json +++ b/store/cafs/tsconfig.json @@ -12,12 +12,18 @@ { "path": "../../__utils__/test-fixtures" }, + { + "path": "../../crypto/integrity" + }, { "path": "../../fetching/fetcher-base" }, { "path": "../../fs/graceful-fs" }, + { + "path": "../../packages/error" + }, { "path": "../../packages/types" }, diff --git a/store/package-store/src/storeController/prune.ts b/store/package-store/src/storeController/prune.ts index 7ad7b7ea13..87cef4df6a 100644 --- a/store/package-store/src/storeController/prune.ts +++ b/store/package-store/src/storeController/prune.ts @@ -5,7 +5,6 @@ import { readMsgpackFile } from '@pnpm/fs.msgpack-file' import { type PackageFilesIndex } from '@pnpm/store.cafs' import { globalInfo, globalWarn } from '@pnpm/logger' import rimraf from '@zkochan/rimraf' -import ssri from 'ssri' import { pruneGlobalVirtualStore } from './pruneGlobalVirtualStore.js' const BIG_ONE = BigInt(1) as unknown @@ -75,7 +74,9 @@ export async function prune ({ cacheDir, storeDir }: PruneOptions, removeAlienFi if (stat.nlink === 1 || stat.nlink === BIG_ONE) { await fs.unlink(filePath) fileCounter++ - removedHashes.add(ssri.fromHex(`${dir}${fileName}`, 'sha512').toString()) + // Store the hex digest, which matches the format stored in PackageFileInfo.digest + // The file name in the store is the hex representation of the hash (with optional -exec suffix) + removedHashes.add(`${dir}${fileName.replace(/-exec$/, '')}`) } })) })) @@ -87,7 +88,7 @@ export async function prune ({ cacheDir, storeDir }: PruneOptions, removeAlienFi const pkgFilesIndex = await readMsgpackFile(pkgIndexFilePath) const pkgJson = pkgFilesIndex.files.get('package.json') // TODO: implement prune of Node.js packages, they don't have a package.json file - if (pkgJson && removedHashes.has(pkgJson.integrity)) { + if (pkgJson && removedHashes.has(pkgJson.digest)) { await fs.unlink(pkgIndexFilePath) pkgCounter++ } diff --git a/store/plugin-commands-store-inspecting/src/findHash.ts b/store/plugin-commands-store-inspecting/src/findHash.ts index ed63ef46f2..8fad609446 100644 --- a/store/plugin-commands-store-inspecting/src/findHash.ts +++ b/store/plugin-commands-store-inspecting/src/findHash.ts @@ -44,7 +44,18 @@ export async function handler (opts: FindHashCommandOptions, params: string[]): throw new PnpmError('MISSING_HASH', '`pnpm find-hash` requires the hash') } - const hash = params[0] + // Convert the input hash to hex format for comparison + // Input can be either: + // - A hex string (used directly) + // - A base64 integrity string like "sha512-..." (converted to hex) + let hash = params[0] + if (hash.includes('-')) { + // Looks like an integrity string (algo-base64), extract and convert the base64 part + const base64Part = hash.split('-').slice(1).join('-') + hash = Buffer.from(base64Part, 'base64').toString('hex') + } + // Stored digests are lowercase hex, so normalize the input to lowercase + hash = hash.toLowerCase() const storeDir = await getStorePath({ pkgRoot: process.cwd(), storePath: opts.storeDir, @@ -74,7 +85,7 @@ export async function handler (opts: FindHashCommandOptions, params: string[]): if (pkgFilesIndex.files) { for (const file of pkgFilesIndex.files.values()) { - if (file?.integrity === hash) { + if (file?.digest === hash) { result.push({ name: pkgFilesIndex.name ?? 'unknown', version: pkgFilesIndex?.version ?? 'unknown', filesIndexFile: filesIndexFile.replace(indexDir, '') }) // a package is only found once. @@ -87,7 +98,7 @@ export async function handler (opts: FindHashCommandOptions, params: string[]): for (const { added } of pkgFilesIndex.sideEffects.values()) { if (!added) continue for (const file of added.values()) { - if (file?.integrity === hash) { + if (file?.digest === hash) { result.push({ name: pkgFilesIndex.name ?? 'unknown', version: pkgFilesIndex?.version ?? 'unknown', filesIndexFile: filesIndexFile.replace(indexDir, '') }) // a package is only found once. diff --git a/store/plugin-commands-store/package.json b/store/plugin-commands-store/package.json index 2507ec42a1..fe3aa67079 100644 --- a/store/plugin-commands-store/package.json +++ b/store/plugin-commands-store/package.json @@ -34,6 +34,7 @@ "dependencies": { "@pnpm/cli-utils": "workspace:*", "@pnpm/config": "workspace:*", + "@pnpm/crypto.integrity": "workspace:*", "@pnpm/dependency-path": "workspace:*", "@pnpm/error": "workspace:*", "@pnpm/fs.msgpack-file": "workspace:*", diff --git a/store/plugin-commands-store/src/storeStatus/index.ts b/store/plugin-commands-store/src/storeStatus/index.ts index 87bc1fda70..4a086a5540 100644 --- a/store/plugin-commands-store/src/storeStatus/index.ts +++ b/store/plugin-commands-store/src/storeStatus/index.ts @@ -1,4 +1,5 @@ import path from 'path' +import { formatIntegrity } from '@pnpm/crypto.integrity' import { getIndexFilePathInCafs, type PackageFilesIndex } from '@pnpm/store.cafs' import { getContextForSingleImporter } from '@pnpm/get-context' import { @@ -52,8 +53,16 @@ export async function storeStatus (maybeOpts: StoreStatusOptions): Promise(pkgIndexFilePath) - return (await dint.check(path.join(virtualStoreDir, dp.depPathToFilename(depPath, maybeOpts.virtualStoreDirMaxLength), 'node_modules', name), Object.fromEntries(files.entries()))) === false + const { algo, files } = await readMsgpackFile(pkgIndexFilePath) + // Transform files to dint format: { integrity: '-', size: number } + const dintFiles: Record = {} + for (const [filePath, { digest, size }] of files) { + dintFiles[filePath] = { + integrity: formatIntegrity(algo, digest), + size, + } + } + return (await dint.check(path.join(virtualStoreDir, dp.depPathToFilename(depPath, maybeOpts.virtualStoreDirMaxLength), 'node_modules', name), dintFiles)) === false }, { concurrency: 8 }) if ((reporter != null) && typeof reporter === 'function') { diff --git a/store/plugin-commands-store/tsconfig.json b/store/plugin-commands-store/tsconfig.json index af86d13b8e..f548594f64 100644 --- a/store/plugin-commands-store/tsconfig.json +++ b/store/plugin-commands-store/tsconfig.json @@ -24,6 +24,9 @@ { "path": "../../config/normalize-registries" }, + { + "path": "../../crypto/integrity" + }, { "path": "../../exec/plugin-commands-script-runners" }, diff --git a/worker/package.json b/worker/package.json index 16bab0de94..c4e80a87bf 100644 --- a/worker/package.json +++ b/worker/package.json @@ -35,6 +35,7 @@ "dependencies": { "@pnpm/cafs-types": "workspace:*", "@pnpm/create-cafs-store": "workspace:*", + "@pnpm/crypto.integrity": "workspace:*", "@pnpm/error": "workspace:*", "@pnpm/exec.pkg-requires-build": "workspace:*", "@pnpm/fs.hard-link-dir": "workspace:*", diff --git a/worker/src/start.ts b/worker/src/start.ts index e99f9e1c0e..33e47cdbac 100644 --- a/worker/src/start.ts +++ b/worker/src/start.ts @@ -7,11 +7,13 @@ import { createCafsStore } from '@pnpm/create-cafs-store' import { pkgRequiresBuild } from '@pnpm/exec.pkg-requires-build' import { hardLinkDir } from '@pnpm/fs.hard-link-dir' import { readMsgpackFileSync, writeMsgpackFileSync } from '@pnpm/fs.msgpack-file' +import { formatIntegrity, parseIntegrity } from '@pnpm/crypto.integrity' import { type CafsFunctions, checkPkgFilesIntegrity, buildFileMapsFromIndex, createCafs, + HASH_ALGORITHM, type PackageFilesIndex, type FilesIndex, optimisticRenameOverwrite, @@ -31,8 +33,6 @@ import { type InitStoreMessage, } from './types.js' -const INTEGRITY_REGEX: RegExp = /^([^-]+)-([a-z0-9+/=]+)$/i - export function startWorker (): void { process.on('uncaughtException', (err) => { console.error(err) @@ -172,19 +172,16 @@ async function handleMessage ( function addTarballToStore ({ buffer, storeDir, integrity, filesIndexFile, appendManifest }: TarballExtractMessage) { if (integrity) { - const [, algo, integrityHash] = integrity.match(INTEGRITY_REGEX)! - // Compensate for the possibility of non-uniform Base64 padding - const normalizedRemoteHash: string = Buffer.from(integrityHash, 'base64').toString('hex') - - const calculatedHash: string = crypto.hash(algo, buffer, 'hex') - if (calculatedHash !== normalizedRemoteHash) { + const { algorithm, hexDigest } = parseIntegrity(integrity) + const calculatedHash: string = crypto.hash(algorithm, buffer, 'hex') + if (calculatedHash !== hexDigest) { return { status: 'error', error: { type: 'integrity_validation_failed', - algorithm: algo, + algorithm, expected: integrity, - found: `${algo}-${Buffer.from(calculatedHash, 'hex').toString('base64')}`, + found: formatIntegrity(algorithm, calculatedHash), }, } } @@ -199,7 +196,7 @@ function addTarballToStore ({ buffer, storeDir, integrity, filesIndexFile, appen addManifestToCafs(cafs, filesIndex, appendManifest) } const { filesIntegrity, filesMap } = processFilesIndex(filesIndex) - const requiresBuild = writeFilesIndexFile(filesIndexFile, { manifest: manifest ?? {}, files: filesIntegrity }) + const requiresBuild = writeFilesIndexFile(filesIndexFile, { algo: HASH_ALGORITHM, manifest: manifest ?? {}, files: filesIntegrity }) return { status: 'success', value: { @@ -213,7 +210,7 @@ function addTarballToStore ({ buffer, storeDir, integrity, filesIndexFile, appen function calcIntegrity (buffer: Buffer): string { const calculatedHash: string = crypto.hash('sha512', buffer, 'hex') - return `sha512-${Buffer.from(calculatedHash, 'hex').toString('base64')}` + return formatIntegrity('sha512', calculatedHash) } interface AddFilesFromDirResult { @@ -287,6 +284,13 @@ function addFilesFromDir ( if (!existingFilesIndex.sideEffects) { existingFilesIndex.sideEffects = new Map() } + // Ensure side effects use the same algorithm as the original package + if (existingFilesIndex.algo !== HASH_ALGORITHM) { + throw new PnpmError( + 'ALGO_MISMATCH', + `Algorithm mismatch: package index uses "${existingFilesIndex.algo}" but side effects were computed with "${HASH_ALGORITHM}"` + ) + } existingFilesIndex.sideEffects.set(sideEffectsCacheKey, calculateDiff(existingFilesIndex.files, filesIntegrity)) if (existingFilesIndex.requiresBuild == null) { requiresBuild = pkgRequiresBuild(manifest, filesMap) @@ -295,7 +299,7 @@ function addFilesFromDir ( } writeIndexFile(filesIndexFile, existingFilesIndex) } else { - requiresBuild = writeFilesIndexFile(filesIndexFile, { manifest: manifest ?? {}, files: filesIntegrity }) + requiresBuild = writeFilesIndexFile(filesIndexFile, { algo: HASH_ALGORITHM, manifest: manifest ?? {}, files: filesIntegrity }) } return { status: 'success', value: { filesMap, manifest, requiresBuild } } } @@ -319,7 +323,7 @@ function calculateDiff (baseFiles: PackageFiles, sideEffectsFiles: PackageFiles) deleted.push(file) } else if ( !baseFiles.has(file) || - baseFiles.get(file)!.integrity !== sideEffectsFiles.get(file)!.integrity || + baseFiles.get(file)!.digest !== sideEffectsFiles.get(file)!.digest || baseFiles.get(file)!.mode !== sideEffectsFiles.get(file)!.mode ) { added.set(file, sideEffectsFiles.get(file)!) @@ -343,10 +347,10 @@ interface ProcessFilesIndexResult { function processFilesIndex (filesIndex: FilesIndex): ProcessFilesIndexResult { const filesIntegrity: PackageFiles = new Map() const filesMap: FilesMap = new Map() - for (const [k, { checkedAt, filePath, integrity, mode, size }] of filesIndex) { + for (const [k, { checkedAt, filePath, digest, mode, size }] of filesIndex) { filesIntegrity.set(k, { checkedAt, - integrity: integrity.toString(), // TODO: use the raw Integrity object + digest, mode, size, }) @@ -403,7 +407,8 @@ function symlinkAllModules (opts: SymlinkAllModulesMessage): { status: 'success' function writeFilesIndexFile ( filesIndexFile: string, - { manifest, files, sideEffects }: { + { algo, manifest, files, sideEffects }: { + algo: string manifest: Partial files: PackageFiles sideEffects?: SideEffects @@ -414,6 +419,7 @@ function writeFilesIndexFile ( name: manifest.name, version: manifest.version, requiresBuild, + algo, files, sideEffects, } diff --git a/worker/tsconfig.json b/worker/tsconfig.json index 91325c787c..e0013f47c3 100644 --- a/worker/tsconfig.json +++ b/worker/tsconfig.json @@ -9,6 +9,9 @@ "../../__typings__/**/*.d.ts" ], "references": [ + { + "path": "../crypto/integrity" + }, { "path": "../exec/pkg-requires-build" },