mirror of
https://github.com/pnpm/pnpm.git
synced 2026-05-19 06:07:59 -04:00
fix: skip Content-Length check when response is content-encoded (#11508)
* fix: skip Content-Length check when response is content-encoded Per the HTTP spec, Content-Length refers to the encoded payload when Content-Encoding is set, but undici fetch yields decoded bytes — so the strict size check rejected legitimate downloads with ERR_PNPM_BAD_TARBALL_SIZE. Closes #11506. * fix(tarball-fetcher): match v10's no-compression behavior Verified the user's report against v10 source: v10 worked because it called node-fetch with `compress: false` (network/fetch/src/fetchFromRegistry.ts on the v10 branch), which suppressed Accept-Encoding and prevented auto-decompression. v11's switch to undici fetch lost that opt-out — undici sends Accept-Encoding: gzip, deflate, br by default and transparently decodes the body, while keeping Content-Length pointed at the encoded payload (confirmed empirically). The strict size check then rejects the download. Restore v10's effective behavior by sending Accept-Encoding: identity for tarball requests, and as defense in depth against misbehaving servers that stamp Content-Encoding regardless, skip the strict size check when the response declares a non-identity Content-Encoding. * fix(tarball-fetcher): parse Content-Encoding as a coding list Per RFC 9110 §8.4 the header is a comma-separated, case-insensitive list that may include whitespace and mixed codings (e.g. `gzip, identity`). The previous string-equality check misclassified those — the response is now treated as encoded iff any coding is non-`identity`.
This commit is contained in:
6
.changeset/tarball-content-encoding.md
Normal file
6
.changeset/tarball-content-encoding.md
Normal file
@@ -0,0 +1,6 @@
|
||||
---
|
||||
"@pnpm/fetching.tarball-fetcher": patch
|
||||
"pnpm": patch
|
||||
---
|
||||
|
||||
Fixed `ERR_PNPM_BAD_TARBALL_SIZE` when a registry serves tarballs with an end-to-end `Content-Encoding` (e.g. `gzip`). Tarballs are already compressed, so the fetcher now requests them with `Accept-Encoding: identity` (matching pnpm v10's effective behavior) and, as defense in depth against misbehaving servers, no longer enforces the strict `Content-Length` check when the response declares a `Content-Encoding` — `Content-Length` in that case refers to the encoded payload, not the decoded bytes the fetch implementation yields [#11506](https://github.com/pnpm/pnpm/issues/11506).
|
||||
@@ -120,6 +120,10 @@ export function createDownloader (
|
||||
try {
|
||||
const res = await fetchFromRegistry(url, {
|
||||
authHeaderValue,
|
||||
// Tarballs are already compressed; ask the server not to apply an additional
|
||||
// Content-Encoding so Content-Length matches the body we receive and we don't
|
||||
// waste CPU on round-trip re-compression. See https://github.com/pnpm/pnpm/issues/11506
|
||||
headers: { 'accept-encoding': 'identity' },
|
||||
// The fetch library can retry requests on bad HTTP responses.
|
||||
// However, it is not enough to retry on bad HTTP responses only.
|
||||
// Requests should also be retried when the tarball's integrity check fails.
|
||||
@@ -133,7 +137,11 @@ export function createDownloader (
|
||||
throw new FetchError({ url, authHeaderValue }, res)
|
||||
}
|
||||
|
||||
const contentLength = res.headers.has('content-length') && res.headers.get('content-length')
|
||||
// When Content-Encoding is present, Content-Length refers to the encoded form
|
||||
// of the data, not the decoded bytes that the fetch implementation yields.
|
||||
// See: https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Content-Encoding
|
||||
const isEncoded = isContentEncoded(res.headers.get('content-encoding'))
|
||||
const contentLength = !isEncoded && res.headers.has('content-length') && res.headers.get('content-length')
|
||||
const parsedLength = typeof contentLength === 'string' ? parseInt(contentLength, 10) : NaN
|
||||
const size = Number.isFinite(parsedLength) && parsedLength >= 0 ? parsedLength : null
|
||||
if (opts.onStart != null) {
|
||||
@@ -213,3 +221,14 @@ export function createDownloader (
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Per RFC 9110 §8.4, Content-Encoding is a comma-separated list of codings.
|
||||
// The response is encoded unless every coding is `identity` (case-insensitive,
|
||||
// surrounding whitespace ignored).
|
||||
function isContentEncoded (header: string | null): boolean {
|
||||
if (header == null) return false
|
||||
return header
|
||||
.split(',')
|
||||
.map(coding => coding.trim().toLowerCase())
|
||||
.some(coding => coding !== '' && coding !== 'identity')
|
||||
}
|
||||
|
||||
@@ -116,6 +116,100 @@ test('fail when tarball size does not match content-length', async () => {
|
||||
)
|
||||
})
|
||||
|
||||
test('request tarballs with Accept-Encoding: identity', async () => {
|
||||
// Tarballs are already gzipped. Asking the server for an additional Content-Encoding
|
||||
// wastes CPU and (when servers misbehave per RFC 7230 §3.3.2) breaks the size check
|
||||
// because Content-Length refers to the encoded payload but undici yields decoded bytes.
|
||||
// See: https://github.com/pnpm/pnpm/issues/11506
|
||||
const tarballContent = fs.readFileSync(tarballPath)
|
||||
const mockPool = mockAgent.get(registry)
|
||||
|
||||
let observedAcceptEncoding: string | string[] | undefined
|
||||
mockPool.intercept({ path: '/foo.tgz', method: 'GET' }).reply(({ headers }) => {
|
||||
observedAcceptEncoding = (headers as Record<string, string | string[]>)['accept-encoding']
|
||||
return { statusCode: 200, data: tarballContent }
|
||||
})
|
||||
|
||||
process.chdir(temporaryDirectory())
|
||||
|
||||
const resolution = { tarball: `${registry}/foo.tgz` }
|
||||
|
||||
await fetch.remoteTarball(cafs, resolution, {
|
||||
filesIndexFile,
|
||||
lockfileDir: process.cwd(),
|
||||
pkg,
|
||||
})
|
||||
|
||||
expect(observedAcceptEncoding).toBe('identity')
|
||||
})
|
||||
|
||||
test.each([
|
||||
['gzip'],
|
||||
['GZIP'],
|
||||
['identity, gzip'],
|
||||
['gzip, identity'],
|
||||
[' gzip '],
|
||||
])("don't fail on content-length mismatch when Content-Encoding is %j", async (contentEncoding) => {
|
||||
// Defense in depth: a misbehaving server may stamp Content-Encoding on the response
|
||||
// even when the client sent Accept-Encoding: identity. In that case undici decodes
|
||||
// the body and Content-Length (encoded form) no longer matches the bytes we read.
|
||||
// Per RFC 9110 §8.4, Content-Encoding is a comma-separated, case-insensitive list,
|
||||
// so the parser must handle whitespace, casing, and mixed codings.
|
||||
const tarballContent = fs.readFileSync(tarballPath)
|
||||
const mockPool = mockAgent.get(registry)
|
||||
|
||||
mockPool.intercept({ path: '/foo.tgz', method: 'GET' }).reply(200, tarballContent, {
|
||||
headers: {
|
||||
'Content-Length': (tarballSize + 100).toString(),
|
||||
'Content-Encoding': contentEncoding,
|
||||
},
|
||||
})
|
||||
|
||||
process.chdir(temporaryDirectory())
|
||||
|
||||
const resolution = { tarball: `${registry}/foo.tgz` }
|
||||
|
||||
const result = await fetch.remoteTarball(cafs, resolution, {
|
||||
filesIndexFile,
|
||||
lockfileDir: process.cwd(),
|
||||
pkg,
|
||||
})
|
||||
|
||||
expect(result.filesMap).toBeTruthy()
|
||||
})
|
||||
|
||||
test('still enforce content-length when Content-Encoding is identity', async () => {
|
||||
// identity (alone, with whitespace, or with mixed casing) means "no transformation";
|
||||
// Content-Length still reflects the bytes on the wire and the strict check applies.
|
||||
const tarballContent = fs.readFileSync(tarballPath)
|
||||
const mockPool = mockAgent.get(registry)
|
||||
|
||||
mockPool.intercept({ path: '/foo.tgz', method: 'GET' }).reply(200, tarballContent, {
|
||||
headers: {
|
||||
'Content-Length': (1024 * 1024).toString(),
|
||||
'Content-Encoding': ' Identity ',
|
||||
},
|
||||
})
|
||||
mockPool.intercept({ path: '/foo.tgz', method: 'GET' }).reply(200, tarballContent, {
|
||||
headers: {
|
||||
'Content-Length': (1024 * 1024).toString(),
|
||||
'Content-Encoding': ' Identity ',
|
||||
},
|
||||
})
|
||||
|
||||
process.chdir(temporaryDirectory())
|
||||
|
||||
const resolution = { tarball: `${registry}/foo.tgz` }
|
||||
|
||||
await expect(
|
||||
fetch.remoteTarball(cafs, resolution, {
|
||||
filesIndexFile,
|
||||
lockfileDir: process.cwd(),
|
||||
pkg,
|
||||
})
|
||||
).rejects.toThrow(BadTarballError)
|
||||
})
|
||||
|
||||
test('retry when tarball size does not match content-length', async () => {
|
||||
const tarballContent = fs.readFileSync(tarballPath)
|
||||
const mockPool = mockAgent.get(registry)
|
||||
|
||||
Reference in New Issue
Block a user