perf: use abbreviated metadata for minimumReleaseAge (#11160)

* perf: use abbreviated metadata for minimumReleaseAge when possible

Instead of always fetching full package metadata when minimumReleaseAge
is set, fetch the smaller abbreviated document first and check the
top-level `modified` field. If the package was last modified before the
release age cutoff, all versions are mature and no per-version time
filtering is needed. Only re-fetch full metadata for the rare case of
recently-modified packages.

Also uses fs.stat() to check cache file mtime instead of reading and
parsing the JSON to check cachedAt, avoiding unnecessary I/O.

* fix: validate modified date and handle abbreviated metadata edge cases

- Validate meta.modified date to prevent invalid dates from bypassing
  minimumReleaseAge filtering
- Skip full metadata refetch for packages excluded by publishedByExclude
- Allow ERR_PNPM_MISSING_TIME from cached abbreviated metadata to fall
  through to the network fetch path instead of throwing

* fix: cache abbreviated metadata before re-fetching full metadata

Save the abbreviated metadata to disk before re-fetching full metadata
so subsequent runs benefit from the mtime cache fast-path.

* fix: resolve type narrowing for conditional metadata fetch result
This commit is contained in:
Zoltan Kochan
2026-04-01 14:47:31 +02:00
committed by GitHub
parent 6c57d746bb
commit 968724fc0b
5 changed files with 194 additions and 34 deletions

View File

@@ -97,7 +97,6 @@ export async function handler (
const fullMetadata = (
(
opts.resolutionMode === 'time-based' ||
Boolean(opts.minimumReleaseAge) ||
opts.trustPolicy === 'no-downgrade'
) && !opts.registrySupportsTimeField
)

View File

@@ -194,19 +194,27 @@ export async function pickPackage (
}
}
if (opts.publishedBy) {
metaCachedInStore = metaCachedInStore ?? await limit(async () => loadMeta(pkgMirror))
if (metaCachedInStore?.cachedAt && new Date(metaCachedInStore.cachedAt) >= opts.publishedBy) {
try {
const pickedPackage = _pickPackageFromMeta(metaCachedInStore)
if (pickedPackage) {
return {
meta: metaCachedInStore,
pickedPackage,
const mtime = await limit(async () => getFileMtime(pkgMirror))
if (mtime != null && mtime >= opts.publishedBy) {
metaCachedInStore = metaCachedInStore ?? await limit(async () => loadMeta(pkgMirror))
if (metaCachedInStore != null) {
try {
const pickedPackage = _pickPackageFromMeta(metaCachedInStore)
if (pickedPackage) {
return {
meta: metaCachedInStore,
pickedPackage,
}
}
} catch (err: unknown) {
// Don't rethrow ERR_PNPM_MISSING_TIME from cached abbreviated metadata —
// let the code fall through to the network fetch path which will get full metadata.
if (
ctx.strictPublishedByCheck &&
!(isMissingTimeError(err))
) {
throw err
}
}
} catch (err) {
if (ctx.strictPublishedByCheck) {
throw err
}
}
}
@@ -215,7 +223,7 @@ export async function pickPackage (
try {
// Load cached metadata to get conditional request headers (ETag, Last-Modified)
metaCachedInStore = metaCachedInStore ?? await limit(async () => loadMeta(pkgMirror))
const fetchResult = await ctx.fetch(spec.name, {
let fetchResult = await ctx.fetch(spec.name, {
authHeaderValue: opts.authHeaderValue,
fullMetadata,
etag: metaCachedInStore?.etag,
@@ -239,28 +247,53 @@ export async function pickPackage (
const cachedAt = Date.now()
let meta = fetchResult.meta
let jsonToSave: string | undefined
if (ctx.filterMetadata) {
meta = clearMeta(meta)
} else if (typeof fetchResult.jsonText === 'string') {
// Reuse the raw JSON text from the registry response to avoid re-stringifying.
// Inject cachedAt and cache validation headers at the start of the JSON object.
const jsonText = fetchResult.jsonText
const firstBraceIndex = jsonText.indexOf('{')
if (firstBraceIndex !== -1) {
let injectedFields = `"cachedAt":${cachedAt}`
if (fetchResult.etag) {
injectedFields += `,"etag":${JSON.stringify(fetchResult.etag)}`
let resultToSave: FetchMetadataResult = fetchResult
// When minimumReleaseAge is active and we fetched abbreviated metadata,
// check if the package was recently modified and needs full metadata
// for per-version time-based filtering.
if (
opts.publishedBy &&
!fullMetadata &&
meta.time == null &&
opts.publishedByExclude?.(spec.name) !== true
) {
const modifiedDate = meta.modified ? new Date(meta.modified) : null
const isModifiedValid = modifiedDate != null && !Number.isNaN(modifiedDate.getTime())
if (!isModifiedValid || modifiedDate >= opts.publishedBy) {
// Save the abbreviated metadata to the abbreviated cache before re-fetching full.
if (!opts.dryRun) {
const abbreviatedJson = prepareJsonForDisk(fetchResult, cachedAt)
// Fire-and-forget save to the abbreviated cache path (pkgMirror).
runLimited(pkgMirror, (limit) => limit(async () => {
try {
await saveMeta(pkgMirror, abbreviatedJson)
} catch (err: any) { // eslint-disable-line
// We don't care if this file was not written to the cache
}
}))
}
const fullFetchResult = await ctx.fetch(spec.name, {
authHeaderValue: opts.authHeaderValue,
fullMetadata: true,
registry: opts.registry,
})
if (!fullFetchResult.notModified) {
resultToSave = fullFetchResult
meta = fullFetchResult.meta
}
jsonToSave = `{${injectedFields},${jsonText.slice(firstBraceIndex + 1)}`
}
}
if (ctx.filterMetadata) {
meta = clearMeta(meta)
}
meta.cachedAt = cachedAt
meta.etag = fetchResult.etag
meta.etag = resultToSave.etag
// only save meta to cache, when it is fresh
ctx.metaCache.set(cacheKey, meta)
if (!opts.dryRun) {
const jsonForDisk = jsonToSave ?? JSON.stringify(meta)
const jsonForDisk = ctx.filterMetadata ? JSON.stringify(meta) : prepareJsonForDisk(resultToSave, cachedAt)
runLimited(pkgMirror, (limit) => limit(async () => {
try {
await saveMeta(pkgMirror, jsonForDisk)
@@ -333,6 +366,38 @@ function encodePkgName (pkgName: string): string {
return pkgName
}
function prepareJsonForDisk (fetchResult: FetchMetadataResult, cachedAt: number): string {
if (typeof fetchResult.jsonText === 'string') {
const firstBraceIndex = fetchResult.jsonText.indexOf('{')
if (firstBraceIndex !== -1) {
let injectedFields = `"cachedAt":${cachedAt}`
if (fetchResult.etag) {
injectedFields += `,"etag":${JSON.stringify(fetchResult.etag)}`
}
return `{${injectedFields},${fetchResult.jsonText.slice(firstBraceIndex + 1)}`
}
}
return JSON.stringify({ ...fetchResult.meta, cachedAt, etag: fetchResult.etag })
}
function isMissingTimeError (err: unknown): boolean {
return (
err != null &&
typeof err === 'object' &&
'code' in err &&
(err as { code: string }).code === 'ERR_PNPM_MISSING_TIME'
)
}
async function getFileMtime (filePath: string): Promise<Date | null> {
try {
const stat = await fs.stat(filePath)
return stat.mtime
} catch {
return null
}
}
async function loadMeta (pkgMirror: string): Promise<PackageMeta | null> {
try {
const data = await gfs.readFile(pkgMirror, 'utf8')

View File

@@ -37,9 +37,21 @@ export function pickPackageFromMeta (
if (publishedBy) {
const excludeResult = publishedByExclude?.(meta.name) ?? false
if (excludeResult !== true) {
assertMetaHasTime(meta)
const trustedVersions = Array.isArray(excludeResult) ? excludeResult : undefined
meta = filterPkgMetadataByPublishDate(meta, publishedBy, trustedVersions)
if (meta.time != null) {
// Full metadata with per-version timestamps: filter normally
assertMetaHasTime(meta)
const trustedVersions = Array.isArray(excludeResult) ? excludeResult : undefined
meta = filterPkgMetadataByPublishDate(meta, publishedBy, trustedVersions)
} else {
const modifiedDate = parseModifiedDate(meta.modified)
if (modifiedDate == null || modifiedDate >= publishedBy) {
// Abbreviated metadata without per-version timestamps, and the package
// was recently modified (or has no/invalid modified field). We cannot determine
// which individual versions are mature enough — need full metadata.
assertMetaHasTime(meta)
}
// else: meta.modified < publishedBy — all versions are old enough, no filtering needed
}
}
}
if ((!meta.versions || Object.keys(meta.versions).length === 0) && !publishedBy) {
@@ -101,6 +113,13 @@ export function assertMetaHasTime (meta: PackageMeta): asserts meta is PackageMe
}
}
function parseModifiedDate (modified: string | undefined): Date | null {
if (!modified) return null
const date = new Date(modified)
if (Number.isNaN(date.getTime())) return null
return date
}
const semverRangeCache = new Map<string, semver.Range | null>()
// This is a performance optimization; working with string-ish semver

View File

@@ -1,7 +1,7 @@
import fs from 'node:fs'
import path from 'node:path'
import { FULL_FILTERED_META_DIR } from '@pnpm/constants'
import { ABBREVIATED_META_DIR, FULL_FILTERED_META_DIR } from '@pnpm/constants'
import { createFetchFromRegistry } from '@pnpm/network.fetch'
import { createNpmResolver } from '@pnpm/resolving.npm-resolver'
import { fixtures } from '@pnpm/test-fixtures'
@@ -20,6 +20,7 @@ const registries: Registries = {
/* eslint-disable @typescript-eslint/no-explicit-any */
const badDatesMeta = loadJsonFileSync<any>(f.find('bad-dates.json'))
const isPositiveMeta = loadJsonFileSync<any>(f.find('is-positive-full.json'))
const isPositiveAbbreviatedMeta = loadJsonFileSync<any>(f.find('is-positive.json'))
/* eslint-enable @typescript-eslint/no-explicit-any */
const fetch = createFetchFromRegistry({})
@@ -152,3 +153,80 @@ test('should skip time field validation for excluded packages', async () => {
expect(resolveResult!.resolvedVia).toBe('npm-registry')
expect(resolveResult!.manifest.version).toBe('3.1.0')
})
test('use abbreviated metadata when modified date is older than publishedBy', async () => {
// is-positive abbreviated has modified: "2017-08-17T19:26:00.508Z"
// publishedBy is set to 2018, so modified < publishedBy → all versions are old enough
getMockAgent().get(registries.default.replace(/\/$/, ''))
.intercept({ path: '/is-positive', method: 'GET' })
.reply(200, isPositiveAbbreviatedMeta)
const cacheDir = temporaryDirectory()
const { resolveFromNpm } = createResolveFromNpm({
storeDir: temporaryDirectory(),
cacheDir,
registries,
})
const resolveResult = await resolveFromNpm({ alias: 'is-positive', bareSpecifier: '^3.0.0' }, {
publishedBy: new Date('2018-01-01T00:00:00.000Z'),
})
expect(resolveResult!.resolvedVia).toBe('npm-registry')
expect(resolveResult!.id).toBe('is-positive@3.1.0')
})
test('re-fetch full metadata when abbreviated modified date is recent', async () => {
// Abbreviated has modified in the future relative to publishedBy → needs full metadata
const recentAbbreviated = {
...isPositiveAbbreviatedMeta,
modified: '2015-06-10T00:00:00.000Z',
}
const agent = getMockAgent().get(registries.default.replace(/\/$/, ''))
// First request: abbreviated
agent.intercept({ path: '/is-positive', method: 'GET' })
.reply(200, recentAbbreviated)
// Second request: full metadata (re-fetch)
agent.intercept({ path: '/is-positive', method: 'GET' })
.reply(200, isPositiveMeta)
const cacheDir = temporaryDirectory()
const { resolveFromNpm } = createResolveFromNpm({
storeDir: temporaryDirectory(),
cacheDir,
registries,
})
// publishedBy is 2015-06-05, modified is 2015-06-10 → modified >= publishedBy → needs full
const resolveResult = await resolveFromNpm({ alias: 'is-positive', bareSpecifier: '^1.0.0' }, {
publishedBy: new Date('2015-06-05T00:00:00.000Z'),
})
expect(resolveResult!.resolvedVia).toBe('npm-registry')
// 1.0.0 was published 2015-06-02, which is before publishedBy (2015-06-05)
expect(resolveResult!.id).toBe('is-positive@1.0.0')
})
test('use cached metadata based on file mtime when publishedBy is set', async () => {
const cacheDir = temporaryDirectory()
// Write abbreviated metadata to the abbreviated cache dir
const cacheDir2 = path.join(cacheDir, `${ABBREVIATED_META_DIR}/registry.npmjs.org`)
fs.mkdirSync(cacheDir2, { recursive: true })
const cachePath = path.join(cacheDir2, 'is-positive.json')
fs.writeFileSync(cachePath, JSON.stringify(isPositiveAbbreviatedMeta), 'utf8')
// No mock agent intercepts — the test verifies no network request is made.
// If a request were attempted, it would fail.
const { resolveFromNpm } = createResolveFromNpm({
storeDir: temporaryDirectory(),
cacheDir,
registries,
})
// publishedBy in the past relative to file mtime (file was just written = now)
const resolveResult = await resolveFromNpm({ alias: 'is-positive', bareSpecifier: '^3.0.0' }, {
publishedBy: new Date('2020-01-01T00:00:00.000Z'),
})
expect(resolveResult!.resolvedVia).toBe('npm-registry')
expect(resolveResult!.id).toBe('is-positive@3.1.0')
})

View File

@@ -62,7 +62,6 @@ export async function createNewStoreController (
const fullMetadata = opts.fetchFullMetadata ?? (
(
opts.resolutionMode === 'time-based' ||
Boolean(opts.minimumReleaseAge) ||
opts.trustPolicy === 'no-downgrade'
) && !opts.registrySupportsTimeField
)