From 968724fc0b49bf2ed06b0255b6290c250b30b89d Mon Sep 17 00:00:00 2001 From: Zoltan Kochan Date: Wed, 1 Apr 2026 14:47:31 +0200 Subject: [PATCH] perf: use abbreviated metadata for minimumReleaseAge (#11160) * perf: use abbreviated metadata for minimumReleaseAge when possible Instead of always fetching full package metadata when minimumReleaseAge is set, fetch the smaller abbreviated document first and check the top-level `modified` field. If the package was last modified before the release age cutoff, all versions are mature and no per-version time filtering is needed. Only re-fetch full metadata for the rare case of recently-modified packages. Also uses fs.stat() to check cache file mtime instead of reading and parsing the JSON to check cachedAt, avoiding unnecessary I/O. * fix: validate modified date and handle abbreviated metadata edge cases - Validate meta.modified date to prevent invalid dates from bypassing minimumReleaseAge filtering - Skip full metadata refetch for packages excluded by publishedByExclude - Allow ERR_PNPM_MISSING_TIME from cached abbreviated metadata to fall through to the network fetch path instead of throwing * fix: cache abbreviated metadata before re-fetching full metadata Save the abbreviated metadata to disk before re-fetching full metadata so subsequent runs benefit from the mtime cache fast-path. * fix: resolve type narrowing for conditional metadata fetch result --- exec/commands/src/dlx.ts | 1 - resolving/npm-resolver/src/pickPackage.ts | 121 ++++++++++++++---- .../npm-resolver/src/pickPackageFromMeta.ts | 25 +++- .../npm-resolver/test/publishedBy.test.ts | 80 +++++++++++- .../src/createNewStoreController.ts | 1 - 5 files changed, 194 insertions(+), 34 deletions(-) diff --git a/exec/commands/src/dlx.ts b/exec/commands/src/dlx.ts index 96d8b6b43c..f79e16873b 100644 --- a/exec/commands/src/dlx.ts +++ b/exec/commands/src/dlx.ts @@ -97,7 +97,6 @@ export async function handler ( const fullMetadata = ( ( opts.resolutionMode === 'time-based' || - Boolean(opts.minimumReleaseAge) || opts.trustPolicy === 'no-downgrade' ) && !opts.registrySupportsTimeField ) diff --git a/resolving/npm-resolver/src/pickPackage.ts b/resolving/npm-resolver/src/pickPackage.ts index 919269f406..e34126b129 100644 --- a/resolving/npm-resolver/src/pickPackage.ts +++ b/resolving/npm-resolver/src/pickPackage.ts @@ -194,19 +194,27 @@ export async function pickPackage ( } } if (opts.publishedBy) { - metaCachedInStore = metaCachedInStore ?? await limit(async () => loadMeta(pkgMirror)) - if (metaCachedInStore?.cachedAt && new Date(metaCachedInStore.cachedAt) >= opts.publishedBy) { - try { - const pickedPackage = _pickPackageFromMeta(metaCachedInStore) - if (pickedPackage) { - return { - meta: metaCachedInStore, - pickedPackage, + const mtime = await limit(async () => getFileMtime(pkgMirror)) + if (mtime != null && mtime >= opts.publishedBy) { + metaCachedInStore = metaCachedInStore ?? await limit(async () => loadMeta(pkgMirror)) + if (metaCachedInStore != null) { + try { + const pickedPackage = _pickPackageFromMeta(metaCachedInStore) + if (pickedPackage) { + return { + meta: metaCachedInStore, + pickedPackage, + } + } + } catch (err: unknown) { + // Don't rethrow ERR_PNPM_MISSING_TIME from cached abbreviated metadata — + // let the code fall through to the network fetch path which will get full metadata. + if ( + ctx.strictPublishedByCheck && + !(isMissingTimeError(err)) + ) { + throw err } - } - } catch (err) { - if (ctx.strictPublishedByCheck) { - throw err } } } @@ -215,7 +223,7 @@ export async function pickPackage ( try { // Load cached metadata to get conditional request headers (ETag, Last-Modified) metaCachedInStore = metaCachedInStore ?? await limit(async () => loadMeta(pkgMirror)) - const fetchResult = await ctx.fetch(spec.name, { + let fetchResult = await ctx.fetch(spec.name, { authHeaderValue: opts.authHeaderValue, fullMetadata, etag: metaCachedInStore?.etag, @@ -239,28 +247,53 @@ export async function pickPackage ( const cachedAt = Date.now() let meta = fetchResult.meta - let jsonToSave: string | undefined - if (ctx.filterMetadata) { - meta = clearMeta(meta) - } else if (typeof fetchResult.jsonText === 'string') { - // Reuse the raw JSON text from the registry response to avoid re-stringifying. - // Inject cachedAt and cache validation headers at the start of the JSON object. - const jsonText = fetchResult.jsonText - const firstBraceIndex = jsonText.indexOf('{') - if (firstBraceIndex !== -1) { - let injectedFields = `"cachedAt":${cachedAt}` - if (fetchResult.etag) { - injectedFields += `,"etag":${JSON.stringify(fetchResult.etag)}` + let resultToSave: FetchMetadataResult = fetchResult + + // When minimumReleaseAge is active and we fetched abbreviated metadata, + // check if the package was recently modified and needs full metadata + // for per-version time-based filtering. + if ( + opts.publishedBy && + !fullMetadata && + meta.time == null && + opts.publishedByExclude?.(spec.name) !== true + ) { + const modifiedDate = meta.modified ? new Date(meta.modified) : null + const isModifiedValid = modifiedDate != null && !Number.isNaN(modifiedDate.getTime()) + if (!isModifiedValid || modifiedDate >= opts.publishedBy) { + // Save the abbreviated metadata to the abbreviated cache before re-fetching full. + if (!opts.dryRun) { + const abbreviatedJson = prepareJsonForDisk(fetchResult, cachedAt) + // Fire-and-forget save to the abbreviated cache path (pkgMirror). + runLimited(pkgMirror, (limit) => limit(async () => { + try { + await saveMeta(pkgMirror, abbreviatedJson) + } catch (err: any) { // eslint-disable-line + // We don't care if this file was not written to the cache + } + })) + } + const fullFetchResult = await ctx.fetch(spec.name, { + authHeaderValue: opts.authHeaderValue, + fullMetadata: true, + registry: opts.registry, + }) + if (!fullFetchResult.notModified) { + resultToSave = fullFetchResult + meta = fullFetchResult.meta } - jsonToSave = `{${injectedFields},${jsonText.slice(firstBraceIndex + 1)}` } } + + if (ctx.filterMetadata) { + meta = clearMeta(meta) + } meta.cachedAt = cachedAt - meta.etag = fetchResult.etag + meta.etag = resultToSave.etag // only save meta to cache, when it is fresh ctx.metaCache.set(cacheKey, meta) if (!opts.dryRun) { - const jsonForDisk = jsonToSave ?? JSON.stringify(meta) + const jsonForDisk = ctx.filterMetadata ? JSON.stringify(meta) : prepareJsonForDisk(resultToSave, cachedAt) runLimited(pkgMirror, (limit) => limit(async () => { try { await saveMeta(pkgMirror, jsonForDisk) @@ -333,6 +366,38 @@ function encodePkgName (pkgName: string): string { return pkgName } +function prepareJsonForDisk (fetchResult: FetchMetadataResult, cachedAt: number): string { + if (typeof fetchResult.jsonText === 'string') { + const firstBraceIndex = fetchResult.jsonText.indexOf('{') + if (firstBraceIndex !== -1) { + let injectedFields = `"cachedAt":${cachedAt}` + if (fetchResult.etag) { + injectedFields += `,"etag":${JSON.stringify(fetchResult.etag)}` + } + return `{${injectedFields},${fetchResult.jsonText.slice(firstBraceIndex + 1)}` + } + } + return JSON.stringify({ ...fetchResult.meta, cachedAt, etag: fetchResult.etag }) +} + +function isMissingTimeError (err: unknown): boolean { + return ( + err != null && + typeof err === 'object' && + 'code' in err && + (err as { code: string }).code === 'ERR_PNPM_MISSING_TIME' + ) +} + +async function getFileMtime (filePath: string): Promise { + try { + const stat = await fs.stat(filePath) + return stat.mtime + } catch { + return null + } +} + async function loadMeta (pkgMirror: string): Promise { try { const data = await gfs.readFile(pkgMirror, 'utf8') diff --git a/resolving/npm-resolver/src/pickPackageFromMeta.ts b/resolving/npm-resolver/src/pickPackageFromMeta.ts index 1081ad153f..4bef57e996 100644 --- a/resolving/npm-resolver/src/pickPackageFromMeta.ts +++ b/resolving/npm-resolver/src/pickPackageFromMeta.ts @@ -37,9 +37,21 @@ export function pickPackageFromMeta ( if (publishedBy) { const excludeResult = publishedByExclude?.(meta.name) ?? false if (excludeResult !== true) { - assertMetaHasTime(meta) - const trustedVersions = Array.isArray(excludeResult) ? excludeResult : undefined - meta = filterPkgMetadataByPublishDate(meta, publishedBy, trustedVersions) + if (meta.time != null) { + // Full metadata with per-version timestamps: filter normally + assertMetaHasTime(meta) + const trustedVersions = Array.isArray(excludeResult) ? excludeResult : undefined + meta = filterPkgMetadataByPublishDate(meta, publishedBy, trustedVersions) + } else { + const modifiedDate = parseModifiedDate(meta.modified) + if (modifiedDate == null || modifiedDate >= publishedBy) { + // Abbreviated metadata without per-version timestamps, and the package + // was recently modified (or has no/invalid modified field). We cannot determine + // which individual versions are mature enough — need full metadata. + assertMetaHasTime(meta) + } + // else: meta.modified < publishedBy — all versions are old enough, no filtering needed + } } } if ((!meta.versions || Object.keys(meta.versions).length === 0) && !publishedBy) { @@ -101,6 +113,13 @@ export function assertMetaHasTime (meta: PackageMeta): asserts meta is PackageMe } } +function parseModifiedDate (modified: string | undefined): Date | null { + if (!modified) return null + const date = new Date(modified) + if (Number.isNaN(date.getTime())) return null + return date +} + const semverRangeCache = new Map() // This is a performance optimization; working with string-ish semver diff --git a/resolving/npm-resolver/test/publishedBy.test.ts b/resolving/npm-resolver/test/publishedBy.test.ts index aa70867bed..3c7b84e93d 100644 --- a/resolving/npm-resolver/test/publishedBy.test.ts +++ b/resolving/npm-resolver/test/publishedBy.test.ts @@ -1,7 +1,7 @@ import fs from 'node:fs' import path from 'node:path' -import { FULL_FILTERED_META_DIR } from '@pnpm/constants' +import { ABBREVIATED_META_DIR, FULL_FILTERED_META_DIR } from '@pnpm/constants' import { createFetchFromRegistry } from '@pnpm/network.fetch' import { createNpmResolver } from '@pnpm/resolving.npm-resolver' import { fixtures } from '@pnpm/test-fixtures' @@ -20,6 +20,7 @@ const registries: Registries = { /* eslint-disable @typescript-eslint/no-explicit-any */ const badDatesMeta = loadJsonFileSync(f.find('bad-dates.json')) const isPositiveMeta = loadJsonFileSync(f.find('is-positive-full.json')) +const isPositiveAbbreviatedMeta = loadJsonFileSync(f.find('is-positive.json')) /* eslint-enable @typescript-eslint/no-explicit-any */ const fetch = createFetchFromRegistry({}) @@ -152,3 +153,80 @@ test('should skip time field validation for excluded packages', async () => { expect(resolveResult!.resolvedVia).toBe('npm-registry') expect(resolveResult!.manifest.version).toBe('3.1.0') }) + +test('use abbreviated metadata when modified date is older than publishedBy', async () => { + // is-positive abbreviated has modified: "2017-08-17T19:26:00.508Z" + // publishedBy is set to 2018, so modified < publishedBy → all versions are old enough + getMockAgent().get(registries.default.replace(/\/$/, '')) + .intercept({ path: '/is-positive', method: 'GET' }) + .reply(200, isPositiveAbbreviatedMeta) + + const cacheDir = temporaryDirectory() + const { resolveFromNpm } = createResolveFromNpm({ + storeDir: temporaryDirectory(), + cacheDir, + registries, + }) + const resolveResult = await resolveFromNpm({ alias: 'is-positive', bareSpecifier: '^3.0.0' }, { + publishedBy: new Date('2018-01-01T00:00:00.000Z'), + }) + + expect(resolveResult!.resolvedVia).toBe('npm-registry') + expect(resolveResult!.id).toBe('is-positive@3.1.0') +}) + +test('re-fetch full metadata when abbreviated modified date is recent', async () => { + // Abbreviated has modified in the future relative to publishedBy → needs full metadata + const recentAbbreviated = { + ...isPositiveAbbreviatedMeta, + modified: '2015-06-10T00:00:00.000Z', + } + + const agent = getMockAgent().get(registries.default.replace(/\/$/, '')) + // First request: abbreviated + agent.intercept({ path: '/is-positive', method: 'GET' }) + .reply(200, recentAbbreviated) + // Second request: full metadata (re-fetch) + agent.intercept({ path: '/is-positive', method: 'GET' }) + .reply(200, isPositiveMeta) + + const cacheDir = temporaryDirectory() + const { resolveFromNpm } = createResolveFromNpm({ + storeDir: temporaryDirectory(), + cacheDir, + registries, + }) + // publishedBy is 2015-06-05, modified is 2015-06-10 → modified >= publishedBy → needs full + const resolveResult = await resolveFromNpm({ alias: 'is-positive', bareSpecifier: '^1.0.0' }, { + publishedBy: new Date('2015-06-05T00:00:00.000Z'), + }) + + expect(resolveResult!.resolvedVia).toBe('npm-registry') + // 1.0.0 was published 2015-06-02, which is before publishedBy (2015-06-05) + expect(resolveResult!.id).toBe('is-positive@1.0.0') +}) + +test('use cached metadata based on file mtime when publishedBy is set', async () => { + const cacheDir = temporaryDirectory() + // Write abbreviated metadata to the abbreviated cache dir + const cacheDir2 = path.join(cacheDir, `${ABBREVIATED_META_DIR}/registry.npmjs.org`) + fs.mkdirSync(cacheDir2, { recursive: true }) + const cachePath = path.join(cacheDir2, 'is-positive.json') + fs.writeFileSync(cachePath, JSON.stringify(isPositiveAbbreviatedMeta), 'utf8') + + // No mock agent intercepts — the test verifies no network request is made. + // If a request were attempted, it would fail. + + const { resolveFromNpm } = createResolveFromNpm({ + storeDir: temporaryDirectory(), + cacheDir, + registries, + }) + // publishedBy in the past relative to file mtime (file was just written = now) + const resolveResult = await resolveFromNpm({ alias: 'is-positive', bareSpecifier: '^3.0.0' }, { + publishedBy: new Date('2020-01-01T00:00:00.000Z'), + }) + + expect(resolveResult!.resolvedVia).toBe('npm-registry') + expect(resolveResult!.id).toBe('is-positive@3.1.0') +}) diff --git a/store/connection-manager/src/createNewStoreController.ts b/store/connection-manager/src/createNewStoreController.ts index c75683cf8e..717f166ebb 100644 --- a/store/connection-manager/src/createNewStoreController.ts +++ b/store/connection-manager/src/createNewStoreController.ts @@ -62,7 +62,6 @@ export async function createNewStoreController ( const fullMetadata = opts.fetchFullMetadata ?? ( ( opts.resolutionMode === 'time-based' || - Boolean(opts.minimumReleaseAge) || opts.trustPolicy === 'no-downgrade' ) && !opts.registrySupportsTimeField )