Files
pnpm/resolving/npm-resolver/src/fetch.ts
Zoltan Kochan 421d120972 perf: use If-Modified-Since for conditional metadata fetches (#11161)
Before fetching package metadata from the registry, stat the local cache
file and send its mtime as an If-Modified-Since header. If the registry
returns 304 Not Modified, read the local cache instead of downloading
the full response body. This saves bandwidth and latency for packages
whose metadata hasn't changed since the last fetch.

Registries that don't support If-Modified-Since simply return 200 as
before, so there is no behavior change for unsupported registries.
2026-04-01 12:39:13 +02:00

175 lines
5.1 KiB
TypeScript

import url from 'node:url'
import { requestRetryLogger } from '@pnpm/core-loggers'
import {
FetchError,
type FetchErrorRequest,
type FetchErrorResponse,
PnpmError,
} from '@pnpm/error'
import type { FetchFromRegistry, RetryTimeoutOptions } from '@pnpm/fetching.types'
import { globalWarn } from '@pnpm/logger'
import type { PackageMeta } from '@pnpm/resolving.registry.types'
import * as retry from '@zkochan/retry'
interface RegistryResponse {
status: number
statusText: string
headers: {
get: (name: string) => string | null
}
json: () => Promise<PackageMeta>
text: () => Promise<string>
}
export interface FetchMetadataResult {
meta: PackageMeta
jsonText: string
etag?: string
notModified?: false
}
export interface FetchMetadataNotModifiedResult {
notModified: true
}
// https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string
// eslint-disable-next-line regexp/no-super-linear-backtracking, regexp/use-ignore-case
const semverRegex = /(.*)(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$/
export class RegistryResponseError extends FetchError {
public readonly pkgName: string
constructor (
request: FetchErrorRequest,
response: FetchErrorResponse,
pkgName: string
) {
let hint: string | undefined
if (response.status === 404) {
hint = `${pkgName} is not in the npm registry, or you have no permission to fetch it.`
const matched = pkgName.match(semverRegex)
if (matched != null) {
hint += ` Did you mean ${matched[1]}?`
}
}
super(request, response, hint)
this.pkgName = pkgName
}
}
export interface FetchMetadataFromFromRegistryOptions {
fetch: FetchFromRegistry
retry: RetryTimeoutOptions
timeout: number
fetchWarnTimeoutMs: number
}
export interface FetchMetadataOptions {
registry: string
authHeaderValue?: string
fullMetadata?: boolean
etag?: string
modified?: string
}
export async function fetchMetadataFromFromRegistry (
fetchOpts: FetchMetadataFromFromRegistryOptions,
pkgName: string,
{
authHeaderValue,
etag: cachedEtag,
fullMetadata,
modified: cachedModified,
registry,
}: FetchMetadataOptions
): Promise<FetchMetadataResult | FetchMetadataNotModifiedResult> {
const uri = toUri(pkgName, registry)
const op = retry.operation(fetchOpts.retry)
return new Promise((resolve, reject) => {
op.attempt(async (attempt) => {
let response: RegistryResponse
const startTime = Date.now()
try {
response = await fetchOpts.fetch(uri, {
authHeaderValue,
compress: true,
fullMetadata,
ifNoneMatch: cachedEtag,
ifModifiedSince: cachedModified ? new Date(cachedModified).toUTCString() : undefined,
retry: fetchOpts.retry,
timeout: fetchOpts.timeout,
}) as RegistryResponse
} catch (error: any) { // eslint-disable-line
reject(new PnpmError('META_FETCH_FAIL', `GET ${uri}: ${error.message as string}`, { attempts: attempt, cause: error }))
return
}
if (response.status === 304) {
resolve({ notModified: true })
return
}
if (response.status >= 400) {
const request = {
authHeaderValue,
url: uri,
}
reject(new RegistryResponseError(request, response, pkgName))
return
}
// Here we only retry broken JSON responses.
// Other HTTP issues are retried by the @pnpm/network.fetch library
try {
const jsonText = await response.text()
const meta = JSON.parse(jsonText) as PackageMeta
// Check if request took longer than expected
const elapsedMs = Date.now() - startTime
if (elapsedMs > fetchOpts.fetchWarnTimeoutMs) {
globalWarn(`Request took ${elapsedMs}ms: ${uri}`)
}
resolve({
meta,
jsonText,
etag: response.headers.get('etag') ?? undefined,
})
} catch (error: any) { // eslint-disable-line
const timeout = op.retry(
new PnpmError('BROKEN_METADATA_JSON', error.message)
)
if (timeout === false) {
reject(op.mainError())
return
}
// Extract error properties into a plain object because Error properties
// are non-enumerable and don't serialize well through the logging system
const errorInfo = {
name: error.name,
message: error.message,
code: error.code,
errno: error.errno,
}
requestRetryLogger.debug({
attempt,
error: errorInfo,
maxRetries: fetchOpts.retry.retries!,
method: 'GET',
timeout,
url: uri,
})
}
})
})
}
function toUri (pkgName: string, registry: string): string {
let encodedName: string
if (pkgName[0] === '@') {
encodedName = `@${encodeURIComponent(pkgName.slice(1))}`
} else {
encodedName = encodeURIComponent(pkgName)
}
return new url.URL(encodedName, registry.endsWith('/') ? registry : `${registry}/`).toString()
}