perf: lazy load metadata and use covering index for headers

This commit is contained in:
Zoltan Kochan
2026-04-02 22:42:57 +02:00
parent 47074b03d1
commit d2d471d331
2 changed files with 42 additions and 11 deletions

View File

@@ -12,7 +12,7 @@ export interface MetadataHeaders {
}
export interface MetadataRow {
data: Uint8Array | string
data: string | Uint8Array
etag?: string
modified?: string
cachedAt?: number
@@ -31,6 +31,7 @@ interface PendingWrite {
interface DbState {
db: DatabaseSyncType
stmtGet: StatementSync
stmtGetHeaders: StatementSync
stmtSet: StatementSync
stmtDelete: StatementSync
stmtListNames: StatementSync
@@ -61,6 +62,7 @@ function getDbState (cacheDir: string): DbState {
db.exec('PRAGMA mmap_size=536870912')
db.exec('PRAGMA cache_size=-64000')
db.exec('PRAGMA temp_store=MEMORY')
db.exec('PRAGMA wal_autocheckpoint=10000')
db.exec(`
CREATE TABLE IF NOT EXISTS metadata (
name TEXT PRIMARY KEY,
@@ -68,19 +70,21 @@ function getDbState (cacheDir: string): DbState {
modified TEXT,
cached_at INTEGER,
is_full INTEGER NOT NULL DEFAULT 0,
data BLOB NOT NULL
data TEXT NOT NULL
)
`)
db.exec('CREATE INDEX IF NOT EXISTS idx_metadata_headers ON metadata (name, etag, modified)')
// Drop tables from previous schema versions
db.exec('DROP TABLE IF EXISTS metadata_index')
db.exec('DROP TABLE IF EXISTS metadata_blobs')
db.exec('DROP TABLE IF EXISTS metadata_manifests')
db.exec('DROP INDEX IF EXISTS idx_metadata_headers')
db.exec('DROP TABLE IF EXISTS lookup_keys')
state = {
db,
stmtGet: db.prepare('SELECT data, etag, modified, cached_at, is_full FROM metadata WHERE name = ?'),
stmtGetHeaders: db.prepare('SELECT etag, modified FROM metadata WHERE name = ?'),
stmtSet: db.prepare('INSERT OR REPLACE INTO metadata (name, etag, modified, cached_at, is_full, data) VALUES (?, ?, ?, ?, ?, ?)'),
stmtDelete: db.prepare('DELETE FROM metadata WHERE name = ?'),
stmtListNames: db.prepare('SELECT name FROM metadata'),
@@ -105,6 +109,21 @@ function flushState (state: DbState): void {
const updates = Array.from(state.pendingCachedAtUpdates.entries())
state.pendingCachedAtUpdates.clear()
if (writes.length + updates.length === 1) {
try {
if (writes.length === 1) {
const w = writes[0]
state.stmtSet.run(w.name, w.etag, w.modified, w.cachedAt, w.isFull ? 1 : 0, w.data)
} else {
const [name, cachedAt] = updates[0]
state.stmtUpdateCachedAt.run(cachedAt, name)
}
return
} catch (_err) {
// ignore
}
}
try {
state.db.exec('BEGIN IMMEDIATE')
let committed = false
@@ -164,7 +183,7 @@ export class MetadataCache {
modified: pending.modified ?? undefined,
}
}
const row = this.state.stmtGet.get(name) as { etag: string | null, modified: string | null } | undefined
const row = this.state.stmtGetHeaders.get(name) as { etag: string | null, modified: string | null } | undefined
if (!row) return undefined
return {
etag: row.etag ?? undefined,
@@ -184,7 +203,7 @@ export class MetadataCache {
}
}
const row = this.state.stmtGet.get(name) as {
data: Uint8Array
data: string | Uint8Array
etag: string | null
modified: string | null
cached_at: number | null

View File

@@ -87,7 +87,9 @@ export async function pickPackage (
const cacheKey = fullMetadata ? `${spec.name}:full` : spec.name
let metaCachedInStore: PackageMeta | null | undefined = ctx.metaCache.get(cacheKey)
if (metaCachedInStore == null) {
// Lazy load only if definitely needed for picking without fetching
if (metaCachedInStore == null && (ctx.offline === true || ctx.preferOffline === true || opts.pickLowestVersion)) {
metaCachedInStore = loadMetaFromDb(ctx.metadataDb, dbName, fullMetadata)
if (metaCachedInStore != null) {
ctx.metaCache.set(cacheKey, metaCachedInStore)
@@ -134,19 +136,29 @@ export async function pickPackage (
}
try {
let etag = metaCachedInStore?.etag
let modified = metaCachedInStore?.modified ?? metaCachedInStore?.time?.modified
if (!etag || !modified) {
const headers = ctx.metadataDb.getHeaders(dbName)
etag = etag ?? headers?.etag
modified = modified ?? headers?.modified
}
let fetchResult = await ctx.fetch(spec.name, {
authHeaderValue: opts.authHeaderValue,
fullMetadata,
etag: metaCachedInStore?.etag,
modified: metaCachedInStore?.modified ?? metaCachedInStore?.time?.modified,
etag,
modified,
registry: opts.registry,
})
// 304 Not Modified — trust whatever is cached, the registry just validated it
if (fetchResult.notModified) {
if (metaCachedInStore != null) {
metaCachedInStore.cachedAt = Date.now()
return { meta: metaCachedInStore, pickedPackage: _pickPackageFromMeta(metaCachedInStore) }
const meta = metaCachedInStore ?? loadMetaFromDb(ctx.metadataDb, dbName, fullMetadata)
if (meta != null) {
meta.cachedAt = Date.now()
ctx.metaCache.set(cacheKey, meta)
return { meta, pickedPackage: _pickPackageFromMeta(meta) }
}
throw new PnpmError('CACHE_MISSING_AFTER_304',
`Metadata cache for ${spec.name} is unreadable after receiving 304 Not Modified`)