From 089484aca81f6624b2a7f636476aaa9219ded33f Mon Sep 17 00:00:00 2001 From: Zoltan Kochan Date: Sat, 6 Jun 2026 02:16:33 +0200 Subject: [PATCH] perf(pnpr): resolve server-side and fetch tarballs directly (#12232) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Reworks pnpr from an install/file accelerator into a resolve-only accelerator: - `POST /v1/resolve` resolves against the client-supplied registries and returns a gzipped JSON lockfile response - pacquet/pnpm clients then fetch tarballs normally from registries with their own credentials and existing parallel fetch/integrity paths - pnpr no longer serves package file bytes or store-index rows, so the server-side file diff, file-frame response, grant table, and public-package byte-gating code are removed The follow-up resolution fast paths are included on the new measured path: - repeated public no-lockfile resolves use a bounded in-memory TTL cache - fresh frozen input lockfiles skip the server-side lockfile-only pacquet resolve after verification proves the lockfile is usable - input lockfile verification and the verdict cache are preserved ## Benchmark Integrated benchmark on Linux shows small improvements in all pnpr rows, with the clearest movement in hot restore. This should be treated as an incremental win rather than a large install-speed change. | Scenario | `pnpr@HEAD` | `pnpr@main` | Change | | --- | ---: | ---: | ---: | | fresh restore, cold cache + cold store | `1.677 s ± 0.090` | `1.686 s ± 0.070` | ~0.6% faster | | fresh restore, hot cache + hot store | `492.5 ms ± 18.1` | `521.9 ms ± 33.4` | ~5.6% faster | | fresh install, cold cache + cold store | `1.997 s ± 0.025` | `2.003 s ± 0.038` | ~0.3% faster | | fresh install, hot cache + hot store | `1.211 s ± 0.024` | `1.236 s ± 0.038` | ~2.0% faster | ## Trade-off Going registry-direct means pnpr no longer gates tarball bytes itself. Private package access is enforced by the upstream registry when the client fetches tarballs. Resolution policy still runs server-side: lockfile verification, release-age policy, trust policy, and resolved package selection continue to happen before the client fetches bytes. --- .changeset/pnpr-resolve-only.md | 8 + Cargo.lock | 2 - .../deps-installer/src/install/index.ts | 141 +--- pacquet/crates/cli/src/cli_args/install.rs | 30 +- pacquet/crates/cli/tests/pnpr_install.rs | 3 +- pacquet/crates/network/src/auth.rs | 2 +- pacquet/crates/package-manager/src/install.rs | 2 +- pacquet/crates/pnpr-client/Cargo.toml | 4 +- pacquet/crates/pnpr-client/src/lib.rs | 329 +------- pacquet/crates/pnpr-client/src/tests.rs | 43 +- .../crates/pnpr-client/tests/integration.rs | 176 +--- .../integrated-benchmark/src/cli_args.rs | 2 +- .../integrated-benchmark/src/work_env.rs | 6 +- pnpm-lock.yaml | 12 - pnpm/test/install/pnpmRegistry.ts | 6 +- pnpr/client/README.md | 19 +- pnpr/client/package.json | 10 +- pnpr/client/src/index.ts | 2 +- pnpr/client/src/protocol.ts | 6 - ...npmRegistry.ts => resolveViaPnprServer.ts} | 129 +-- pnpr/client/tsconfig.json | 9 - pnpr/crates/pnpr/src/config.rs | 28 +- pnpr/crates/pnpr/src/install_accelerator.rs | 792 ------------------ .../pnpr/src/install_accelerator/diff.rs | 157 ---- .../src/install_accelerator/grant_table.rs | 108 --- .../install_accelerator/grant_table/tests.rs | 74 -- .../install_accelerator/public_packages.rs | 95 --- .../public_packages/tests.rs | 42 - .../pnpr/src/install_accelerator/tests.rs | 354 -------- pnpr/crates/pnpr/src/lib.rs | 2 +- pnpr/crates/pnpr/src/main.rs | 2 +- pnpr/crates/pnpr/src/resolver.rs | 469 +++++++++++ .../protocol.rs | 22 +- .../resolve.rs | 224 ++--- .../resolve/tests.rs | 0 pnpr/crates/pnpr/src/resolver/tests.rs | 62 ++ .../verdict_cache.rs | 2 +- .../verdict_cache/tests.rs | 0 pnpr/crates/pnpr/src/s3.rs | 2 +- pnpr/crates/pnpr/src/server.rs | 64 +- pnpr/npm/pnpr/README.md | 6 +- worker/src/index.ts | 54 +- worker/src/start.ts | 76 -- worker/src/types.ts | 12 - 44 files changed, 843 insertions(+), 2745 deletions(-) create mode 100644 .changeset/pnpr-resolve-only.md rename pnpr/client/src/{fetchFromPnpmRegistry.ts => resolveViaPnprServer.ts} (57%) delete mode 100644 pnpr/crates/pnpr/src/install_accelerator.rs delete mode 100644 pnpr/crates/pnpr/src/install_accelerator/diff.rs delete mode 100644 pnpr/crates/pnpr/src/install_accelerator/grant_table.rs delete mode 100644 pnpr/crates/pnpr/src/install_accelerator/grant_table/tests.rs delete mode 100644 pnpr/crates/pnpr/src/install_accelerator/public_packages.rs delete mode 100644 pnpr/crates/pnpr/src/install_accelerator/public_packages/tests.rs delete mode 100644 pnpr/crates/pnpr/src/install_accelerator/tests.rs create mode 100644 pnpr/crates/pnpr/src/resolver.rs rename pnpr/crates/pnpr/src/{install_accelerator => resolver}/protocol.rs (88%) rename pnpr/crates/pnpr/src/{install_accelerator => resolver}/resolve.rs (60%) rename pnpr/crates/pnpr/src/{install_accelerator => resolver}/resolve/tests.rs (100%) create mode 100644 pnpr/crates/pnpr/src/resolver/tests.rs rename pnpr/crates/pnpr/src/{install_accelerator => resolver}/verdict_cache.rs (98%) rename pnpr/crates/pnpr/src/{install_accelerator => resolver}/verdict_cache/tests.rs (100%) diff --git a/.changeset/pnpr-resolve-only.md b/.changeset/pnpr-resolve-only.md new file mode 100644 index 0000000000..2678718148 --- /dev/null +++ b/.changeset/pnpr-resolve-only.md @@ -0,0 +1,8 @@ +--- +"@pnpm/pnpr.client": minor +"@pnpm/installing.deps-installer": minor +"@pnpm/worker": patch +"pnpm": minor +--- + +The pnpr install accelerator is now used only to create the lockfile. Previously `POST /v1/install` returned the resolved lockfile **and** all missing file contents inline over a single connection, which was bandwidth-bound on cold/WAN installs (one TCP stream can't compete with a registry's parallel CDN fetches). The accelerator is now a two-phase flow: the pnpr server resolves and verifies the lockfile server-side (collapsing resolution's round-trip depth), then the client fetches every tarball directly from the registries in parallel, exactly like a normal install. This makes the accelerated path never slower than a plain install, and turns pnpr into a stateless resolver that stores no tarballs and serves no file content [#12230](https://github.com/pnpm/pnpm/issues/12230). diff --git a/Cargo.lock b/Cargo.lock index 7acb8ee19f..325e39f3fa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3335,14 +3335,12 @@ dependencies = [ name = "pacquet-pnpr-client" version = "0.0.1" dependencies = [ - "base64 0.22.1", "derive_more", "flate2", "mockito", "pacquet-config", "pacquet-lockfile", "pacquet-lockfile-verification", - "pacquet-store-dir", "pacquet-testing-utils", "pnpr", "reqwest 0.13.3", diff --git a/installing/deps-installer/src/install/index.ts b/installing/deps-installer/src/install/index.ts index e8666b9cfe..9cbd21e76e 100644 --- a/installing/deps-installer/src/install/index.ts +++ b/installing/deps-installer/src/install/index.ts @@ -177,7 +177,7 @@ export async function install ( // When a pnpr server is configured, use server-side resolution // instead of the normal resolution flow. if (opts.pnprServer) { - return installFromPnpmRegistry(manifest, rootDir, opts) + return installViaPnprServer(manifest, rootDir, opts) } const { updatedCatalogs, updatedProjects: projects, ignoredBuilds, resolutionPolicyViolations } = await mutateModules( @@ -2287,10 +2287,10 @@ async function mutateModulesViaPnpr ( const pnprProjects = await preparePnprProjects(projects, opts) if (!pnprProjects) return null - // installFromPnpmRegistry runs the headless install for the first + // installViaPnprServer runs the headless install for the first // project's root and the workspace path for the rest. Pass the // pre-processed manifests so resolution sees the post-mutation state. - const result = await installFromPnpmRegistry( + const result = await installViaPnprServer( pnprProjects[0].manifest, pnprProjects[0].rootDir, opts, @@ -2323,11 +2323,11 @@ async function mutateModulesViaPnpr ( } /** - * When a pnpr server is configured, resolve dependencies server-side - * and download only the missing files. Then run a headless install to link - * packages into node_modules. + * When a pnpr server is configured, resolve dependencies server-side, + * then run a headless install that fetches tarballs from the registries + * and links packages into node_modules — like a normal install. */ -async function installFromPnpmRegistry ( +async function installViaPnprServer ( manifest: ProjectManifest, rootDir: ProjectRootDir, opts: Opts, @@ -2345,10 +2345,8 @@ async function installFromPnpmRegistry ( { hint: 'Unset `trustPolicy` for this install, or disable the pnpr server (unset `--pnpr-server` / `pnprServer` in pnpm-workspace.yaml) so resolution runs locally and the trust check applies.' } ) } - const { fetchFromPnpmRegistry } = await import('@pnpm/pnpr.client') + const { resolveViaPnprServer } = await import('@pnpm/pnpr.client') const { createGetAuthHeaderByURI, getAuthHeadersFromCreds } = await import('@pnpm/network.auth-header') - const { StoreIndex } = await import('@pnpm/store.index') - const { setImportConcurrency } = await import('@pnpm/worker') // Forward the whole credential map (the registries a graph touches // aren't known up front), so the server attaches the right token per @@ -2356,10 +2354,6 @@ async function installFromPnpmRegistry ( const configByUri = opts.configByUri ?? {} const forwardedAuthHeaders = getAuthHeadersFromCreds(configByUri) const pnprAuthorization = createGetAuthHeaderByURI(configByUri)(opts.pnprServer!) - // Raise import concurrency for this install only — the pnpr server path has no - // concurrent fetching competing for workers. Restore afterwards so we - // don't leak a process-wide mutation to other installs (e.g. tests). - const restoreImportConcurrency = setImportConcurrency(6) try { const lockfileDir = opts.lockfileDir ?? rootDir @@ -2373,51 +2367,33 @@ async function installFromPnpmRegistry ( logger.info({ message: 'Resolving dependencies via the pnpr server', prefix: rootDir }) - // Open the store index to read integrities and write new entries. - // Close it in a finally so a failure in fetchFromPnpmRegistry doesn't - // leak an open SQLite handle (on Windows that also blocks store cleanup). - const storeIndex = new StoreIndex(opts.storeDir) - let lockfile, pnprStats, fileDownloads, indexEntries - try { - // Build projects list for workspace support. - // Normalize separators to POSIX — on Windows `path.relative` returns - // backslashes, which the pnpr server rejects (it treats `\` as an - // unsafe/YAML-injection character and normalizes paths as POSIX). - const projectsList = allInstallProjects && allInstallProjects.length > 1 - ? allInstallProjects.map(p => ({ - dir: (path.relative(lockfileDir, p.rootDir) || '.').split(path.sep).join('/'), - dependencies: p.manifest.dependencies, - devDependencies: p.manifest.devDependencies, - optionalDependencies: p.manifest.optionalDependencies, - })) - : undefined - - ;({ lockfile, stats: pnprStats, fileDownloads, indexEntries } = await fetchFromPnpmRegistry({ - registryUrl: opts.pnprServer!, - storeDir: opts.storeDir, - storeIndex, - dependencies: projectsList ? undefined : manifest.dependencies, - devDependencies: projectsList ? undefined : manifest.devDependencies, - optionalDependencies: projectsList ? undefined : manifest.optionalDependencies, - projects: projectsList, - registry: opts.registries?.default, - namedRegistries: opts.namedRegistries, - authHeaders: forwardedAuthHeaders, - authorization: pnprAuthorization, - overrides: opts.overrides, - minimumReleaseAge: opts.minimumReleaseAge, - lockfile: existingLockfile ?? undefined, - lockfileOnly: opts.lockfileOnly, + // Build projects list for workspace support. + // Normalize separators to POSIX — on Windows `path.relative` returns + // backslashes, which the pnpr server rejects (it treats `\` as an + // unsafe/YAML-injection character and normalizes paths as POSIX). + const projectsList = allInstallProjects && allInstallProjects.length > 1 + ? allInstallProjects.map(p => ({ + dir: (path.relative(lockfileDir, p.rootDir) || '.').split(path.sep).join('/'), + dependencies: p.manifest.dependencies, + devDependencies: p.manifest.devDependencies, + optionalDependencies: p.manifest.optionalDependencies, })) + : undefined - // Write store index entries so headless install finds them. - const { writeRawIndexEntries } = await import('@pnpm/pnpr.client') - writeRawIndexEntries(indexEntries, storeIndex) - - storeIndex.checkpoint() - } finally { - storeIndex.close() - } + const { lockfile, stats: pnprStats } = await resolveViaPnprServer({ + registryUrl: opts.pnprServer!, + dependencies: projectsList ? undefined : manifest.dependencies, + devDependencies: projectsList ? undefined : manifest.devDependencies, + optionalDependencies: projectsList ? undefined : manifest.optionalDependencies, + projects: projectsList, + registry: opts.registries?.default, + namedRegistries: opts.namedRegistries, + authHeaders: forwardedAuthHeaders, + authorization: pnprAuthorization, + overrides: opts.overrides, + minimumReleaseAge: opts.minimumReleaseAge, + lockfile: existingLockfile ?? undefined, + }) await writeWantedLockfileAndRecordVerified({ lockfileDir, @@ -2429,7 +2405,7 @@ async function installFromPnpmRegistry ( }) logger.info({ - message: `Resolved ${pnprStats.totalPackages} packages: ${pnprStats.alreadyInStore} cached, ${pnprStats.filesToDownload} files to download`, + message: `Resolved ${pnprStats.totalPackages} packages`, prefix: rootDir, }) @@ -2437,10 +2413,6 @@ async function installFromPnpmRegistry ( // pnpm fetches nothing and links nothing in this mode — stop before the // headless install. See https://github.com/pnpm/pnpm/issues/12146. if (opts.lockfileOnly) { - // Nothing is downloaded in this mode, but the lockfile arrives before - // the stream closes — observe `fileDownloads` so a stream error after - // the `L` frame doesn't surface as an unhandled rejection. - void fileDownloads.catch(() => {}) return { updatedCatalogs: undefined, updatedManifest: manifest, @@ -2451,48 +2423,12 @@ async function installFromPnpmRegistry ( } } - // Wrap fetchPackage to: - // 1. Wait for pnpr server file downloads before checking the store - // 2. Skip integrity verification — files just written from the pnpr server - // are guaranteed correct (server verified, no rehashing needed) - const { readPkgFromCafs } = await import('@pnpm/worker') - const { storeIndexKey: _storeIndexKey } = await import('@pnpm/store.index') - const wrappedStoreController = { - ...opts.storeController, - fetchPackage: async (fetchOpts: any) => { // eslint-disable-line @typescript-eslint/no-explicit-any - await fileDownloads - const resolution = fetchOpts.pkg.resolution - const integrity = resolution?.integrity - // Fall through to the regular store controller for git-hosted tarballs. - // Their cached entry lives under gitHostedStoreIndexKey (preserves the - // built/not-built dimension), not the integrity-keyed path the pnpr server - // uses for npm tarballs. See @pnpm/store.pkg-finder for the rationale. - if (integrity && !resolution?.gitHosted) { - const filesIndexFile = _storeIndexKey(integrity, fetchOpts.pkg.id) - const result = await readPkgFromCafs( - { storeDir: opts.storeDir, verifyStoreIntegrity: false }, - filesIndexFile, - { readManifest: true, expectedPkg: { name: fetchOpts.pkg.name, version: fetchOpts.pkg.version } } - ) - return { - fetching: () => Promise.resolve({ - files: result.files, - bundledManifest: result.bundledManifest, - integrity, - }), - filesIndexFile, - } - } - return opts.storeController.fetchPackage(fetchOpts) - }, - } - + // The pnpr server only resolves; it serves no file content. Fetch every + // tarball from the registries with the regular store controller, in + // parallel, exactly like a normal install. See + // https://github.com/pnpm/pnpm/issues/12230. const headlessOpts = { ...opts, - // Skip re-verifying files just written from the pnpr server — they're - // guaranteed correct (server verified, no rehashing needed). - verifyStoreIntegrity: false, - storeController: wrappedStoreController, dir: rootDir as string, lockfileDir, engineStrict: opts.engineStrict ?? false, @@ -2553,6 +2489,5 @@ async function installFromPnpmRegistry ( // normal install path does the same; skipping it here would leave // pending writes on disk and diverge from lifecycle expectations. await opts.storeController.close() - restoreImportConcurrency() } } diff --git a/pacquet/crates/cli/src/cli_args/install.rs b/pacquet/crates/cli/src/cli_args/install.rs index 3f8a31934e..b884d35476 100644 --- a/pacquet/crates/cli/src/cli_args/install.rs +++ b/pacquet/crates/cli/src/cli_args/install.rs @@ -5,7 +5,7 @@ use pacquet_config::NodeLinker; use pacquet_lockfile::Lockfile; use pacquet_package_manager::{Install, UpdateSeedPolicy}; use pacquet_package_manifest::DependencyGroup; -use pacquet_pnpr_client::{InstallOptions, PnprClient, PnprClientError}; +use pacquet_pnpr_client::{PnprClient, PnprClientError, ResolveOptions}; use pacquet_reporter::Reporter; /// `--node-linker` value parser. CLI mirror of @@ -393,18 +393,18 @@ struct PnprLink<'a> { node_linker: NodeLinker, skip_runtimes: bool, /// Governs the *server's* resolution behavior (frozen vs - /// reuse-and-update); forwarded to `/v1/install`. The local + /// reuse-and-update); forwarded to `/v1/resolve`. The local /// materialization always runs frozen against the server-produced /// lockfile. frozen_lockfile: bool, /// The *effective* `preferFrozenLockfile` (the CLI tri-state already /// resolved against `config.prefer_frozen_lockfile`, exactly as the - /// local `Install` resolves it); forwarded to `/v1/install`. `false` + /// local `Install` resolves it); forwarded to `/v1/resolve`. `false` /// forces the server to re-resolve. Resolving here — rather than /// sending the raw CLI override — keeps a yaml `preferFrozenLockfile: /// false` honored on the pnpr path without `--no-prefer-frozen-lockfile`. prefer_frozen_lockfile: bool, - /// `--lockfile-only`. Forwarded to `/v1/install` so the server + /// `--lockfile-only`. Forwarded to `/v1/resolve` so the server /// resolves only — returning the lockfile without fetching files — /// after which `install_via_pnpr` writes the lockfile and skips /// materialization, mirroring pnpm's resolve + write, fetch nothing, @@ -425,11 +425,13 @@ struct PnprLink<'a> { /// Resolve a single project through a `pnpr` server, then link it. /// /// Sends the client's registries to the server, which resolves against -/// them and streams back the missing files; writes the server-produced -/// lockfile, then runs a frozen install to materialize `node_modules` -/// from it — the equivalent of pnpm's `installFromPnpmRegistry` handing -/// off to `headlessInstall`. Under `--lockfile-only` it stops after -/// writing the lockfile (fetch nothing, link nothing). +/// them and returns the resolved lockfile; writes that lockfile, then +/// runs a frozen install to materialize `node_modules` from it — the +/// frozen install fetches every tarball from the registries itself, like +/// a normal install. This is the equivalent of pnpm's +/// `installFromPnpmRegistry` handing off to `headlessInstall`. Under +/// `--lockfile-only` it stops after writing the lockfile (fetch nothing, +/// link nothing). async fn install_via_pnpr( state: &State, pnpr_server: &str, @@ -445,6 +447,11 @@ async fn install_via_pnpr( .dependencies([DependencyGroup::Dev]) .map(|(name, spec)| (name.to_string(), spec.to_string())) .collect(); + let optional_dependencies = state + .manifest + .dependencies([DependencyGroup::Optional]) + .map(|(name, spec)| (name.to_string(), spec.to_string())) + .collect(); let overrides = state .config @@ -463,10 +470,10 @@ async fn install_via_pnpr( // verifier) and freshly-resolved ones (the resolver's pick-time // gate, since the policy is wired into the server's config). let outcome = match PnprClient::new(pnpr_server) - .install(InstallOptions { - store_dir: &state.config.store_dir, + .resolve(ResolveOptions { dependencies, dev_dependencies, + optional_dependencies, registry: state.config.registry.clone(), named_registries: state.config.named_registries.clone(), // Forward the whole credential map: the registries a graph @@ -484,7 +491,6 @@ async fn install_via_pnpr( frozen_lockfile: link.frozen_lockfile, prefer_frozen_lockfile: Some(link.prefer_frozen_lockfile), ignore_manifest_check: link.ignore_manifest_check, - lockfile_only: link.lockfile_only, trust_lockfile: link.trust_lockfile, minimum_release_age: state.config.minimum_release_age, minimum_release_age_exclude: state.config.minimum_release_age_exclude.clone(), diff --git a/pacquet/crates/cli/tests/pnpr_install.rs b/pacquet/crates/cli/tests/pnpr_install.rs index cd3b126c62..8d20e8bd91 100644 --- a/pacquet/crates/cli/tests/pnpr_install.rs +++ b/pacquet/crates/cli/tests/pnpr_install.rs @@ -82,7 +82,8 @@ fn install_via_pnpr_links_node_modules() { let virtual_path = workspace.join("node_modules/.pnpm/@foo+no-deps@1.0.0"); assert!(virtual_path.exists(), "virtual store should hold the package"); assert!(workspace.join("pnpm-lock.yaml").exists(), "pnpr should write the lockfile"); - // The client store was populated by the server's `/v1/files` downloads. + // The client store was populated by the frozen install fetching tarballs + // directly from the registry after pnpr returned the lockfile. assert!(store_dir.join("v11/index.db").exists(), "client store index should exist"); drop((root, mock_instance)); diff --git a/pacquet/crates/network/src/auth.rs b/pacquet/crates/network/src/auth.rs index 9029d1625e..d2665a5ea7 100644 --- a/pacquet/crates/network/src/auth.rs +++ b/pacquet/crates/network/src/auth.rs @@ -89,7 +89,7 @@ impl AuthHeaders { /// The `(nerf_darted_uri, header_value)` pairs backing this lookup, so /// a caller can forward the whole set to another process (the pnpr - /// accelerator) and rebuild it with [`Self::from_map`]. + /// resolver) and rebuild it with [`Self::from_map`]. pub fn entries(&self) -> impl Iterator { self.by_uri.iter().map(|(uri, value)| (uri.as_str(), value.as_str())) } diff --git a/pacquet/crates/package-manager/src/install.rs b/pacquet/crates/package-manager/src/install.rs index a7c0017690..ae9c7631e6 100644 --- a/pacquet/crates/package-manager/src/install.rs +++ b/pacquet/crates/package-manager/src/install.rs @@ -179,7 +179,7 @@ where pub update_seed_policy: UpdateSeedPolicy, /// Per-invocation `Authorization`-header override for resolve/verify; /// `None` (every local install) uses `config.auth_headers`. The pnpr - /// accelerator threads request-scoped [`AuthHeaders`] here so it + /// resolver threads request-scoped [`AuthHeaders`] here so it /// resolves a caller's private content without baking per-user auth /// into the shared `&'static Config`. pub auth_override: Option>, diff --git a/pacquet/crates/pnpr-client/Cargo.toml b/pacquet/crates/pnpr-client/Cargo.toml index 0b9e75c36c..c04da13b2c 100644 --- a/pacquet/crates/pnpr-client/Cargo.toml +++ b/pacquet/crates/pnpr-client/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pacquet-pnpr-client" -description = "Client for pnpr's server-accelerated installs" +description = "Client for pnpr's server-side resolver" version = "0.0.1" edition.workspace = true license.workspace = true @@ -14,8 +14,6 @@ path = "src/lib.rs" pacquet-config = { workspace = true } pacquet-lockfile = { workspace = true } pacquet-lockfile-verification = { workspace = true } -pacquet-store-dir = { workspace = true } -base64 = { workspace = true } derive_more = { workspace = true } flate2 = { workspace = true } reqwest = { workspace = true } diff --git a/pacquet/crates/pnpr-client/src/lib.rs b/pacquet/crates/pnpr-client/src/lib.rs index 6f9603b461..05745c4192 100644 --- a/pacquet/crates/pnpr-client/src/lib.rs +++ b/pacquet/crates/pnpr-client/src/lib.rs @@ -1,35 +1,22 @@ -//! Client for pnpr's server-accelerated installs. +//! Client for pnpr's server-side resolver. //! -//! Given a set of dependencies and the client's content-addressable -//! store, it: +//! Given a set of dependencies, it `POST`s them to `/v1/resolve`, where +//! the server resolves against the client's registries, verifies the +//! input lockfile under the client's policy, and answers with the +//! resolved lockfile as a gzipped JSON object. The caller then fetches +//! every tarball itself, in parallel, like a normal install +//! ([pnpm/pnpm#12230](https://github.com/pnpm/pnpm/issues/12230)). //! -//! 1. reads the integrities already in the local store index, -//! 2. `POST`s them with the dependencies to `/v1/install`, asking the -//! server to inline the file contents it's missing (`inlineFiles`), -//! 3. parses the single combined response — a length-prefixed JSON header -//! (lockfile, stats, store-index entries, or verification violations) -//! followed by the missing files' bytes, -//! 4. writes those bytes straight into the local CAFS *by digest* (no -//! re-hashing) and writes the forwarded store-index entries, and -//! 5. returns the resolved lockfile for a headless install. -//! -//! The whole exchange is one round trip — no handshake, no follow-up -//! `/v1/files` fetch. See -//! [pnpm/pnpm#12165](https://github.com/pnpm/pnpm/issues/12165). The -//! response is buffered rather than truly streamed, a tracked follow-up. +//! pnpr is a stateless resolver: it stores no tarballs and serves no file +//! content. -use std::{ - collections::{BTreeMap, HashSet}, - io::Read as _, -}; +use std::{collections::BTreeMap, io::Read as _}; -use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64}; use derive_more::{Display, Error, From}; use flate2::read::GzDecoder; use pacquet_config::TrustPolicy; use pacquet_lockfile::Lockfile; use pacquet_lockfile_verification::{RenderedViolation, VerifyError}; -use pacquet_store_dir::{StoreDir, StoreIndex, StoreIndexWriter, decode_package_files_index}; use reqwest::Client; use serde::Deserialize; @@ -44,24 +31,22 @@ pub struct PnprClient { } /// Inputs for a single-project resolution. -pub struct InstallOptions<'a> { - /// The client's content-addressable store. Resolved files and store - /// index entries are written here. - pub store_dir: &'a StoreDir, +pub struct ResolveOptions { pub dependencies: DepMap, pub dev_dependencies: DepMap, + pub optional_dependencies: DepMap, /// The client's default registry. The server resolves against this /// (and `named_registries`) rather than its own configuration. pub registry: String, /// The client's named-registry aliases. pub named_registries: DepMap, /// The caller's forwarded upstream credentials, keyed by nerf-darted - /// registry URI, so the server resolves/fetches private content as the + /// registry URI, so the server resolves private content as the /// caller. Distinct from [`Self::authorization`] (pnpr identity). pub auth_headers: DepMap, /// `Authorization` for the pnpr server's own URL (`None` if it needs - /// none): identifies the caller to pnpr's gate and keys the grant - /// table. Distinct from the upstream creds in [`Self::auth_headers`]. + /// none): identifies the caller to pnpr. Distinct from the upstream + /// creds in [`Self::auth_headers`]. pub authorization: Option, /// The client's `overrides` (selector -> spec) as raw JSON, applied /// at resolve time server-side. @@ -78,13 +63,6 @@ pub struct InstallOptions<'a> { /// `ignoreManifestCheck`: skip the manifest ↔ lockfile freshness /// comparison during the frozen resolve. pub ignore_manifest_check: bool, - /// `lockfileOnly`: ask the server to resolve only — return the - /// lockfile without fetching tarballs or computing the file diff, so - /// the response carries no missing files. The caller writes the - /// lockfile and skips materialization, mirroring pnpm's - /// `--lockfile-only`. See - /// [pnpm/pnpm#12146](https://github.com/pnpm/pnpm/issues/12146). - pub lockfile_only: bool, /// The client's effective `trustLockfile`. When `true` the server /// skips verifying the input lockfile (it still reuses it for /// resolution), mirroring the local `--trust-lockfile` opt-out. @@ -99,30 +77,20 @@ pub struct InstallOptions<'a> { pub trust_policy_ignore_after: Option, } -/// Result of [`PnprClient::install`]. +/// Result of [`PnprClient::resolve`]. #[must_use] -pub struct InstallOutcome { +pub struct ResolveOutcome { /// The resolved lockfile, ready for a headless install. pub lockfile: Lockfile, pub stats: Stats, - /// Number of inlined file entries written into the local CAFS. - pub files_written: usize, - /// Number of store-index entries written to the local index. - pub index_entries_written: usize, } -/// Resolution statistics from the response header. Field names mirror -/// the server's camelCase JSON. +/// Resolution statistics from the response. Field names mirror the +/// server's camelCase JSON. #[derive(Debug, Default, Deserialize)] #[serde(rename_all = "camelCase", default)] pub struct Stats { pub total_packages: u64, - pub already_in_store: u64, - pub packages_to_fetch: u64, - pub files_in_new_packages: u64, - pub files_already_in_cafs: u64, - pub files_to_download: u64, - pub download_bytes: u64, } #[derive(Debug, Display, Error, From)] @@ -196,29 +164,17 @@ impl PnprClient { Ok(()) } - /// Resolve a single project against the server and materialize the - /// missing files + store-index entries into the local store. - /// - /// One round trip: the request asks the server to inline the file - /// contents (`inlineFiles`), so the response carries the lockfile, - /// stats, store-index entries, and the missing files' bytes in a - /// single body — no handshake and no follow-up `/v1/files` fetch. - /// See [pnpm/pnpm#12165](https://github.com/pnpm/pnpm/issues/12165). - pub async fn install( - &self, - opts: InstallOptions<'_>, - ) -> Result { - let store_keys = read_store_keys(opts.store_dir); - let store_integrities = integrities_from_keys(&store_keys); - let present: HashSet<&str> = store_keys.iter().map(String::as_str).collect(); - + /// Resolve a single project against the server and return the + /// resolved lockfile. The server serves no file content — the caller + /// fetches every tarball itself. + pub async fn resolve(&self, opts: ResolveOptions) -> Result { let request = serde_json::json!({ "projects": [{ "dir": ".", "dependencies": opts.dependencies, "devDependencies": opts.dev_dependencies, + "optionalDependencies": opts.optional_dependencies, }], - "storeIntegrities": store_integrities, "registry": opts.registry, "namedRegistries": opts.named_registries, "authHeaders": opts.auth_headers, @@ -227,7 +183,6 @@ impl PnprClient { "frozenLockfile": opts.frozen_lockfile, "preferFrozenLockfile": opts.prefer_frozen_lockfile, "ignoreManifestCheck": opts.ignore_manifest_check, - "lockfileOnly": opts.lockfile_only, "trustLockfile": opts.trust_lockfile, "minimumReleaseAge": opts.minimum_release_age, "minimumReleaseAgeExclude": opts.minimum_release_age_exclude, @@ -235,10 +190,9 @@ impl PnprClient { "trustPolicy": opts.trust_policy, "trustPolicyExclude": opts.trust_policy_exclude, "trustPolicyIgnoreAfter": opts.trust_policy_ignore_after, - "inlineFiles": true, }); - let mut post = self.http.post(format!("{}v1/install", self.base_url)).json(&request); + let mut post = self.http.post(format!("{}v1/resolve", self.base_url)).json(&request); if let Some(authorization) = opts.authorization.as_deref() { post = post.header("authorization", authorization); } @@ -246,25 +200,11 @@ impl PnprClient { if !response.status().is_success() { let status = response.status(); let body = response.text().await.unwrap_or_default(); - return Err(PnprClientError::Server(format!("/v1/install returned {status}: {body}"))); + return Err(PnprClientError::Server(format!("/v1/resolve returned {status}: {body}"))); } let raw = response.bytes().await?; - let parsed = parse_inline_response(&decompress(&raw)?)?; - - // The server inlines only the files the client is missing; a - // `--lockfile-only` resolve and a verification pass both carry an - // empty file payload, so this writes nothing in those cases. - let files_written = write_files_payload(opts.store_dir, &parsed.files_payload)?; - let index_entries_written = - write_index_entries(opts.store_dir, parsed.index_entries, &present).await; - - Ok(InstallOutcome { - lockfile: parsed.lockfile, - stats: parsed.stats, - files_written, - index_entries_written, - }) + parse_response(&decompress(&raw)?) } } @@ -283,62 +223,30 @@ fn decompress(raw: &[u8]) -> Result, PnprClientError> { } } -struct ParsedInstall { - lockfile: Lockfile, - stats: Stats, - /// The `/v1/files`-shaped binary frames the server inlined after the - /// header — written into the CAFS by [`write_files_payload`]. - files_payload: Vec, - index_entries: Vec<(String, Vec)>, -} - -/// Decode the combined `inlineFiles` install response: a 4-byte -/// big-endian header length, that many bytes of JSON header (lockfile, -/// stats, store-index entries, or verification violations), then the -/// file frames. -fn parse_inline_response(payload: &[u8]) -> Result { - if payload.len() < 4 { - return Err(PnprClientError::Protocol("install response too short".to_string())); - } - let header_len = u32::from_be_bytes([payload[0], payload[1], payload[2], payload[3]]) as usize; - let header_end = 4 + header_len; - if header_end > payload.len() { - return Err(PnprClientError::Protocol("install header truncated".to_string())); - } - let header: InlineHeader = serde_json::from_slice(&payload[4..header_end]) +/// Parse the install response: a JSON object carrying the resolved +/// lockfile and stats, or — when the server rejected the input lockfile +/// under the client's policy — the rendered verification violations. +fn parse_response(payload: &[u8]) -> Result { + let response: ResolveResponse = serde_json::from_slice(payload) .map_err(|err| PnprClientError::Protocol(err.to_string()))?; - if let Some(violations) = header.violations.filter(|list| !list.is_empty()) { + if let Some(violations) = response.violations.filter(|list| !list.is_empty()) { return Err(PnprClientError::Verification(build_verify_error(violations))); } - let lockfile = header + let lockfile = response .lockfile .ok_or_else(|| PnprClientError::Protocol("install response had no lockfile".to_string()))?; - let mut index_entries = Vec::with_capacity(header.index_entries.len()); - for entry in header.index_entries { - let raw = - BASE64.decode(&entry.b64).map_err(|err| PnprClientError::Protocol(err.to_string()))?; - index_entries.push((entry.key, raw)); - } - - Ok(ParsedInstall { - lockfile, - stats: header.stats, - files_payload: payload[header_end..].to_vec(), - index_entries, - }) + Ok(ResolveOutcome { lockfile, stats: response.stats }) } #[derive(Deserialize)] #[serde(rename_all = "camelCase")] -struct InlineHeader { +struct ResolveResponse { lockfile: Option, #[serde(default)] stats: Stats, - #[serde(default)] - index_entries: Vec, /// Present when the server rejected the input lockfile under the /// client's verification policy. Each entry mirrors the local /// runner's rendered violation so the client can rebuild the @@ -347,14 +255,6 @@ struct InlineHeader { violations: Option>, } -#[derive(Deserialize)] -struct InlineIndexEntry { - /// The store-index key, `{integrity}\t{pkgId}`. - key: String, - /// The base64-encoded msgpackr-records buffer. - b64: String, -} - #[derive(Deserialize)] struct WireViolation { name: String, @@ -398,162 +298,5 @@ fn intern_violation_code(code: &str) -> &'static str { } } -/// Decode the inlined binary file payload and write each entry to the -/// CAFS by digest. Returns the number of entries written. An empty -/// payload (no frames before the end-of-stream marker) writes nothing. -fn write_files_payload(store_dir: &StoreDir, payload: &[u8]) -> Result { - if payload.is_empty() { - return Ok(0); - } - if payload.len() < 4 { - return Err(PnprClientError::Protocol("files payload too short".to_string())); - } - let json_len = u32::from_be_bytes([payload[0], payload[1], payload[2], payload[3]]) as usize; - let mut offset = 4 + json_len; - let mut written = 0; - - loop { - if offset + 64 > payload.len() { - return Err(PnprClientError::Protocol("truncated files payload".to_string())); - } - let digest_bytes = &payload[offset..offset + 64]; - if digest_bytes.iter().all(|byte| *byte == 0) { - break; // end-of-stream marker - } - if offset + 69 > payload.len() { - return Err(PnprClientError::Protocol("truncated file header".to_string())); - } - let size = u32::from_be_bytes([ - payload[offset + 64], - payload[offset + 65], - payload[offset + 66], - payload[offset + 67], - ]) as usize; - let executable = payload[offset + 68] & 0x01 != 0; - let content_start = offset + 69; - let content_end = content_start + size; - if content_end > payload.len() { - return Err(PnprClientError::Protocol("truncated file content".to_string())); - } - let content = &payload[content_start..content_end]; - let digest = hex_encode(digest_bytes); - - write_cas_file(store_dir, &digest, executable, content)?; - written += 1; - offset = content_end; - } - - Ok(written) -} - -/// Write `content` to its content-addressed path. The digest is trusted -/// (the fast path skips re-hashing); a complete file already on disk is -/// left as-is, and a truncated one is replaced atomically — mirroring -/// the TypeScript `fetch-and-write-cafs` worker. -fn write_cas_file( - store_dir: &StoreDir, - digest: &str, - executable: bool, - content: &[u8], -) -> Result<(), PnprClientError> { - let mode = if executable { 0o755 } else { 0o644 }; - let path = store_dir - .cas_file_path_by_mode(digest, mode) - .ok_or_else(|| PnprClientError::Protocol(format!("invalid digest: {digest}")))?; - - if let Ok(metadata) = std::fs::metadata(&path) - && metadata.len() == content.len() as u64 - { - return Ok(()); // already present and complete - } - if let Some(parent) = path.parent() { - std::fs::create_dir_all(parent)?; - } - - let tmp = path.with_extension("tmp"); - std::fs::write(&tmp, content)?; - set_executable(&tmp, executable)?; - std::fs::rename(&tmp, &path)?; - Ok(()) -} - -#[cfg(unix)] -fn set_executable(path: &std::path::Path, executable: bool) -> std::io::Result<()> { - use std::os::unix::fs::PermissionsExt as _; - let mode = if executable { 0o755 } else { 0o644 }; - std::fs::set_permissions(path, std::fs::Permissions::from_mode(mode)) -} - -#[cfg(not(unix))] -fn set_executable(_path: &std::path::Path, _executable: bool) -> std::io::Result<()> { - Ok(()) -} - -/// Write the forwarded store-index entries, skipping keys already -/// present. Each entry's raw msgpackr-records buffer is decoded and -/// re-queued through the writer, whose blocking drain is awaited so the -/// rows are flushed before they're reported as written. -async fn write_index_entries( - store_dir: &StoreDir, - entries: Vec<(String, Vec)>, - present: &HashSet<&str>, -) -> usize { - let to_write: Vec<(String, Vec)> = - entries.into_iter().filter(|(key, _)| !present.contains(key.as_str())).collect(); - if to_write.is_empty() { - return 0; - } - - let (writer, writer_task) = StoreIndexWriter::spawn(store_dir); - let mut written = 0; - for (key, raw) in &to_write { - if let Ok(decoded) = decode_package_files_index(raw) { - writer.queue(key.clone(), decoded); - written += 1; - } - } - drop(writer); - let _ = writer_task.await; - written -} - -fn read_store_keys(store_dir: &StoreDir) -> Vec { - match StoreIndex::open_readonly_in(store_dir) { - Ok(index) => index.keys().unwrap_or_default(), - Err(_) => Vec::new(), - } -} - -/// The SRI integrities already in the store, derived from the -/// `{integrity}\t{pkgId}` index keys. Non-integrity keys (e.g. git URLs) -/// are filtered out — sending them would just bloat the request. -fn integrities_from_keys(keys: &[String]) -> Vec { - let mut seen = HashSet::new(); - let mut out = Vec::new(); - for key in keys { - let Some((integrity, _pkg_id)) = key.split_once('\t') else { continue }; - if !is_integrity_like(integrity) { - continue; - } - if seen.insert(integrity) { - out.push(integrity.to_string()); - } - } - out -} - -fn is_integrity_like(value: &str) -> bool { - value.starts_with("sha512-") || value.starts_with("sha256-") || value.starts_with("sha1-") -} - -fn hex_encode(bytes: &[u8]) -> String { - use std::fmt::Write as _; - let mut out = String::with_capacity(bytes.len() * 2); - for byte in bytes { - let _ = write!(out, "{byte:02x}"); - } - out -} - #[cfg(test)] mod tests; diff --git a/pacquet/crates/pnpr-client/src/tests.rs b/pacquet/crates/pnpr-client/src/tests.rs index 156806644c..ba6d8452d7 100644 --- a/pacquet/crates/pnpr-client/src/tests.rs +++ b/pacquet/crates/pnpr-client/src/tests.rs @@ -1,28 +1,12 @@ -use super::{PnprClientError, VerifyError, parse_inline_response}; +use super::{PnprClientError, VerifyError, parse_response}; -/// Frame a JSON header into a complete inline install payload with an -/// empty file section (the `{}` prefix plus the end-of-stream marker), -/// matching what the server sends when there are no files to inline. -fn inline_payload(header_json: &str) -> Vec { - let header = header_json.as_bytes(); - let mut payload = Vec::new(); - payload.extend_from_slice(&(header.len() as u32).to_be_bytes()); - payload.extend_from_slice(header); - payload.extend_from_slice(&2u32.to_be_bytes()); - payload.extend_from_slice(b"{}"); - payload.extend_from_slice(&[0u8; 64]); - payload -} - -/// A header carrying verification violations is rebuilt into the same -/// `VerifyError` the local gate raises, so the CLI aborts with an +/// A response header carrying verification violations is rebuilt into the +/// same `VerifyError` the local gate raises, so the CLI aborts with an /// identical diagnostic code + breakdown. #[test] -fn header_with_violations_rebuilds_a_verify_error() { - let payload = inline_payload( - r#"{"violations":[{"name":"@foo/no-deps","version":"1.0.0","code":"MINIMUM_RELEASE_AGE_VIOLATION","reason":"was published yesterday"}]}"#, - ); - let Err(PnprClientError::Verification(verify_err)) = parse_inline_response(&payload) else { +fn response_with_violations_rebuilds_a_verify_error() { + let payload = br#"{"violations":[{"name":"@foo/no-deps","version":"1.0.0","code":"MINIMUM_RELEASE_AGE_VIOLATION","reason":"was published yesterday"}]}"#; + let Err(PnprClientError::Verification(verify_err)) = parse_response(payload) else { panic!("expected a Verification error"); }; assert!( @@ -37,10 +21,8 @@ fn header_with_violations_rebuilds_a_verify_error() { /// variant. #[test] fn tarball_mismatch_maps_to_the_generic_envelope() { - let payload = inline_payload( - r#"{"violations":[{"name":"acme","version":"1.0.0","code":"TARBALL_URL_MISMATCH","reason":"url mismatch"}]}"#, - ); - let Err(PnprClientError::Verification(verify_err)) = parse_inline_response(&payload) else { + let payload = br#"{"violations":[{"name":"acme","version":"1.0.0","code":"TARBALL_URL_MISMATCH","reason":"url mismatch"}]}"#; + let Err(PnprClientError::Verification(verify_err)) = parse_response(payload) else { panic!("expected a Verification error"); }; assert!( @@ -49,12 +31,11 @@ fn tarball_mismatch_maps_to_the_generic_envelope() { ); } -/// A header with no lockfile and no violations is a malformed response, -/// not a silent success. +/// A response with no lockfile and no violations is malformed, not a +/// silent success. #[test] -fn header_without_a_lockfile_is_a_protocol_error() { - let payload = inline_payload("{}"); - let Err(PnprClientError::Protocol(_)) = parse_inline_response(&payload) else { +fn response_without_a_lockfile_is_a_protocol_error() { + let Err(PnprClientError::Protocol(_)) = parse_response(b"{}") else { panic!("expected a Protocol error"); }; } diff --git a/pacquet/crates/pnpr-client/tests/integration.rs b/pacquet/crates/pnpr-client/tests/integration.rs index a094af2ee2..3a612d1fa2 100644 --- a/pacquet/crates/pnpr-client/tests/integration.rs +++ b/pacquet/crates/pnpr-client/tests/integration.rs @@ -2,9 +2,11 @@ //! //! Topology: a shared [`TestRegistry`] serves the package fixtures; a //! per-test in-process `pnpr` hosts the `/-/pnpr` handshake + -//! `/v1/install` + `/v1/files` endpoints. The client sends the registry -//! it wants resolved from, so the pnpr server's *own* uplink is left at -//! the default — proving resolution uses the client-supplied registry. +//! `/v1/resolve` endpoints. The client sends the registry it wants +//! resolved from, so the pnpr server's *own* uplink is left at the +//! default — proving resolution uses the client-supplied registry. pnpr +//! serves no file content; the client receives only the resolved +//! lockfile. use std::{ collections::BTreeMap, @@ -12,8 +14,7 @@ use std::{ time::Duration, }; -use pacquet_pnpr_client::{InstallOptions, PnprClient, PnprClientError}; -use pacquet_store_dir::StoreDir; +use pacquet_pnpr_client::{PnprClient, PnprClientError, ResolveOptions}; use pacquet_testing_utils::registry::TestRegistry; use tempfile::TempDir; use tokio::net::TcpListener; @@ -75,15 +76,11 @@ async fn register_token(registry_url: &str, username: &str) -> String { json["token"].as_str().expect("token in adduser response").to_string() } -fn options<'a>( - store: &'a StoreDir, - registry: &str, - dependencies: BTreeMap, -) -> InstallOptions<'a> { - InstallOptions { - store_dir: store, +fn options(registry: &str, dependencies: BTreeMap) -> ResolveOptions { + ResolveOptions { dependencies, dev_dependencies: BTreeMap::new(), + optional_dependencies: BTreeMap::new(), registry: registry.to_string(), named_registries: BTreeMap::new(), auth_headers: BTreeMap::new(), @@ -93,7 +90,6 @@ fn options<'a>( frozen_lockfile: false, prefer_frozen_lockfile: None, ignore_manifest_check: false, - lockfile_only: false, trust_lockfile: false, minimum_release_age: None, minimum_release_age_exclude: None, @@ -106,16 +102,15 @@ fn options<'a>( /// The forwarded per-registry credentials and the pnpr-server identity /// header must travel on the wire: `authHeaders` in the body (so the -/// server resolves/fetches private content as the caller) and -/// `Authorization` on the request (so pnpr's gate + grant table key on -/// the right user). A `mockito` server captures the request and asserts -/// both are present; the canned 500 just short-circuits the client after -/// the match. +/// server resolves private content as the caller) and `Authorization` on +/// the request (so pnpr identifies the caller). A `mockito` server +/// captures the request and asserts both are present; the canned 500 just +/// short-circuits the client after the match. #[tokio::test] async fn forwards_credentials_and_the_identity_header() { let mut server = mockito::Server::new_async().await; let mock = server - .mock("POST", "/v1/install") + .mock("POST", "/v1/resolve") .match_header("authorization", "Bearer pnpr-token") .match_body(mockito::Matcher::PartialJsonString( r#"{"authHeaders":{"//npm.acme.test/":"Bearer upstream-token"}}"#.to_string(), @@ -125,46 +120,41 @@ async fn forwards_credentials_and_the_identity_header() { .create_async() .await; - let client_store = TempDir::new().unwrap(); - let store = StoreDir::new(client_store.path().to_path_buf()); let client = PnprClient::new(format!("{}/", server.url())); - let mut opts = options(&store, "https://npm.acme.test/", deps([("@acme/foo", "1.0.0")])); + let mut opts = options("https://npm.acme.test/", deps([("@acme/foo", "1.0.0")])); opts.auth_headers = deps([("//npm.acme.test/", "Bearer upstream-token")]); opts.authorization = Some("Bearer pnpr-token".to_string()); - let result = client.install(opts).await; + let result = client.resolve(opts).await; assert!(result.is_err(), "the canned 500 should surface as an error"); mock.assert_async().await; } /// End-to-end: the test registry gates `@pnpm.e2e/needs-auth` behind -/// `$authenticated`, so resolving it through the accelerator only works +/// `$authenticated`, so resolving it through the resolver only works /// when the caller's upstream token is forwarded and the server fetches -/// the packument + tarball as the caller. +/// the packument as the caller. #[tokio::test] async fn a_forwarded_credential_resolves_a_private_package() { let registry = TestRegistry::start(); let token = register_token(®istry.url(), "needs-auth-forwarder").await; let (pnpr_url, _storage) = start_pnpr().await; - let client_store = TempDir::new().unwrap(); - let store = StoreDir::new(client_store.path().to_path_buf()); let client = PnprClient::new(pnpr_url); - let mut opts = options(&store, ®istry.url(), deps([("@pnpm.e2e/needs-auth", "1.0.0")])); + let mut opts = options(®istry.url(), deps([("@pnpm.e2e/needs-auth", "1.0.0")])); let mut auth = BTreeMap::new(); auth.insert(nerf_key(®istry.url()), format!("Bearer {token}")); opts.auth_headers = auth; - let outcome = client.install(opts).await.expect("forwarded credential should resolve it"); + let outcome = client.resolve(opts).await.expect("forwarded credential should resolve it"); let packages = outcome.lockfile.packages.as_ref().expect("lockfile has packages"); assert!( packages.keys().any(|key| key.to_string().starts_with("@pnpm.e2e/needs-auth@1.0.0")), "lockfile should contain the authed package, got: {:?}", packages.keys().map(ToString::to_string).collect::>(), ); - assert!(outcome.files_written >= 1, "its files should be materialized"); } /// The same install without a forwarded credential fails: the registry @@ -175,12 +165,10 @@ async fn a_private_package_fails_without_a_forwarded_credential() { let registry = TestRegistry::start(); let (pnpr_url, _storage) = start_pnpr().await; - let client_store = TempDir::new().unwrap(); - let store = StoreDir::new(client_store.path().to_path_buf()); let client = PnprClient::new(pnpr_url); - let opts = options(&store, ®istry.url(), deps([("@pnpm.e2e/needs-auth", "1.0.0")])); - let Err(PnprClientError::Server(message)) = client.install(opts).await else { + let opts = options(®istry.url(), deps([("@pnpm.e2e/needs-auth", "1.0.0")])); + let Err(PnprClientError::Server(message)) = client.resolve(opts).await else { panic!("expected the gated install to fail with a server error"); }; assert!( @@ -190,16 +178,14 @@ async fn a_private_package_fails_without_a_forwarded_credential() { } #[tokio::test] -async fn resolves_and_downloads_a_package() { +async fn resolves_a_package() { let registry = TestRegistry::start(); let (pnpr_url, _storage) = start_pnpr().await; - let client_store = TempDir::new().unwrap(); - let store = StoreDir::new(client_store.path().to_path_buf()); let client = PnprClient::new(pnpr_url); let outcome = client - .install(options(&store, ®istry.url(), deps([("@foo/no-deps", "1.0.0")]))) + .resolve(options(®istry.url(), deps([("@foo/no-deps", "1.0.0")]))) .await .expect("install should succeed"); @@ -211,101 +197,27 @@ async fn resolves_and_downloads_a_package() { ); assert!(outcome.stats.total_packages >= 1); - assert!(outcome.stats.packages_to_fetch >= 1, "first run should fetch the package"); - assert!(outcome.files_written >= 1, "at least package.json should be written"); - assert!(outcome.index_entries_written >= 1, "the package's index entry should be written"); - - let store_keys = pacquet_store_dir::StoreIndex::open_readonly_in(&store) - .expect("open client index") - .keys() - .expect("read keys"); - assert!( - store_keys.iter().any(|key| key.contains("@foo/no-deps@1.0.0")), - "client store index should hold the package, got: {store_keys:?}", - ); } +/// Optional dependencies must reach the server in the request, not be +/// silently dropped, so the resolved lockfile includes their edges. #[tokio::test] -async fn lockfile_only_resolves_without_fetching_files() { +async fn forwards_optional_dependencies() { let registry = TestRegistry::start(); let (pnpr_url, _storage) = start_pnpr().await; - let client_store = TempDir::new().unwrap(); - let store = StoreDir::new(client_store.path().to_path_buf()); let client = PnprClient::new(pnpr_url); - // `--lockfile-only`: the server resolves and returns the lockfile but - // fetches nothing and serves no files, so the client store stays - // empty. Mirrors pnpm's resolve + write, fetch nothing, link nothing. - let mut opts = options(&store, ®istry.url(), deps([("@foo/no-deps", "1.0.0")])); - opts.lockfile_only = true; - let outcome = client.install(opts).await.expect("lockfile-only install should succeed"); + let mut opts = options(®istry.url(), BTreeMap::new()); + opts.optional_dependencies = deps([("@foo/no-deps", "1.0.0")]); + let outcome = client.resolve(opts).await.expect("install should succeed"); let packages = outcome.lockfile.packages.as_ref().expect("lockfile has packages"); assert!( packages.keys().any(|key| key.to_string().starts_with("@foo/no-deps@1.0.0")), - "lockfile should still contain @foo/no-deps@1.0.0", + "the optional dependency should be resolved into the lockfile, got: {:?}", + packages.keys().map(ToString::to_string).collect::>(), ); - assert_eq!(outcome.files_written, 0, "lockfile-only should download no files"); - assert_eq!(outcome.index_entries_written, 0, "lockfile-only should write no index entries"); - assert!( - pacquet_store_dir::StoreIndex::open_readonly_in(&store) - .map(|index| index.keys().unwrap_or_default().is_empty()) - .unwrap_or(true), - "client store index should stay empty after a lockfile-only install", - ); -} - -#[tokio::test] -async fn warm_store_skips_already_present_files() { - let registry = TestRegistry::start(); - let (pnpr_url, _storage) = start_pnpr().await; - - let client_store = TempDir::new().unwrap(); - let store = StoreDir::new(client_store.path().to_path_buf()); - let client = PnprClient::new(pnpr_url); - - let cold = client - .install(options(&store, ®istry.url(), deps([("@foo/no-deps", "1.0.0")]))) - .await - .expect("cold install"); - assert!(cold.files_written >= 1); - - let warm = client - .install(options(&store, ®istry.url(), deps([("@foo/no-deps", "1.0.0")]))) - .await - .expect("warm install"); - - assert!(warm.stats.already_in_store >= 1, "package should be recognized as cached"); - assert_eq!(warm.files_written, 0, "warm run should download no files"); - assert_eq!(warm.index_entries_written, 0, "warm run should write no index entries"); -} - -#[tokio::test] -async fn resolves_a_multi_file_package() { - let registry = TestRegistry::start(); - let (pnpr_url, _storage) = start_pnpr().await; - - let client_store = TempDir::new().unwrap(); - let store = StoreDir::new(client_store.path().to_path_buf()); - let client = PnprClient::new(pnpr_url); - - let outcome = client - .install(options( - &store, - ®istry.url(), - deps([("@pnpm.e2e/hello-world-js-bin", "1.0.0")]), - )) - .await - .expect("install should succeed"); - - let packages = outcome.lockfile.packages.as_ref().expect("lockfile has packages"); - assert!( - packages - .keys() - .any(|key| key.to_string().starts_with("@pnpm.e2e/hello-world-js-bin@1.0.0")), - ); - assert!(outcome.files_written >= 2, "expected multiple files, got {}", outcome.files_written); } #[tokio::test] @@ -313,22 +225,20 @@ async fn verifies_and_accepts_a_clean_input_lockfile() { let registry = TestRegistry::start(); let (pnpr_url, _storage) = start_pnpr().await; - let client_store = TempDir::new().unwrap(); - let store = StoreDir::new(client_store.path().to_path_buf()); let client = PnprClient::new(pnpr_url); // A first install with no lockfile produces a valid resolved one. let first = client - .install(options(&store, ®istry.url(), deps([("@foo/no-deps", "1.0.0")]))) + .resolve(options(®istry.url(), deps([("@foo/no-deps", "1.0.0")]))) .await .expect("first install"); // Sending it back as the input lockfile makes the server verify it // under the (default, policy-free) client policy before resolving; // a clean lockfile passes and the install succeeds. - let mut opts = options(&store, ®istry.url(), deps([("@foo/no-deps", "1.0.0")])); + let mut opts = options(®istry.url(), deps([("@foo/no-deps", "1.0.0")])); opts.lockfile = Some(first.lockfile.clone()); - let second = client.install(opts).await.expect("verified-input install should succeed"); + let second = client.resolve(opts).await.expect("verified-input install should succeed"); assert!(second.lockfile.packages.is_some(), "resolution still produced a lockfile"); } @@ -337,24 +247,22 @@ async fn rejects_an_input_lockfile_that_violates_the_clients_policy() { let registry = TestRegistry::start(); let (pnpr_url, _storage) = start_pnpr().await; - let client_store = TempDir::new().unwrap(); - let store = StoreDir::new(client_store.path().to_path_buf()); let client = PnprClient::new(pnpr_url); let first = client - .install(options(&store, ®istry.url(), deps([("@foo/no-deps", "1.0.0")]))) + .resolve(options(®istry.url(), deps([("@foo/no-deps", "1.0.0")]))) .await .expect("first install"); // Re-send the same lockfile under a ~100-year minimumReleaseAge: no // real publish time can satisfy it, so the server rejects the input // lockfile and the client rebuilds the identical `VerifyError`. - let mut opts = options(&store, ®istry.url(), deps([("@foo/no-deps", "1.0.0")])); + let mut opts = options(®istry.url(), deps([("@foo/no-deps", "1.0.0")])); opts.lockfile = Some(first.lockfile.clone()); opts.minimum_release_age = Some(60 * 24 * 365 * 100); opts.minimum_release_age_ignore_missing_time = false; - let Err(PnprClientError::Verification(verify_err)) = client.install(opts).await else { + let Err(PnprClientError::Verification(verify_err)) = client.resolve(opts).await else { panic!("expected a verification error rejecting the input lockfile"); }; assert!( @@ -368,12 +276,10 @@ async fn trust_lockfile_makes_the_server_skip_verification() { let registry = TestRegistry::start(); let (pnpr_url, _storage) = start_pnpr().await; - let client_store = TempDir::new().unwrap(); - let store = StoreDir::new(client_store.path().to_path_buf()); let client = PnprClient::new(pnpr_url); let first = client - .install(options(&store, ®istry.url(), deps([("@foo/no-deps", "1.0.0")]))) + .resolve(options(®istry.url(), deps([("@foo/no-deps", "1.0.0")]))) .await .expect("first install"); @@ -381,13 +287,13 @@ async fn trust_lockfile_makes_the_server_skip_verification() { // trips on, but with the client's `trustLockfile` opt-out set: the // server must skip the verify gate and resolve normally, matching the // local `--trust-lockfile` path. - let mut opts = options(&store, ®istry.url(), deps([("@foo/no-deps", "1.0.0")])); + let mut opts = options(®istry.url(), deps([("@foo/no-deps", "1.0.0")])); opts.lockfile = Some(first.lockfile.clone()); opts.minimum_release_age = Some(60 * 24 * 365 * 100); opts.minimum_release_age_ignore_missing_time = false; opts.trust_lockfile = true; - let outcome = client.install(opts).await.expect("trustLockfile should skip verification"); + let outcome = client.resolve(opts).await.expect("trustLockfile should skip verification"); assert!(outcome.lockfile.packages.is_some(), "install still resolved a lockfile"); } diff --git a/pacquet/tasks/integrated-benchmark/src/cli_args.rs b/pacquet/tasks/integrated-benchmark/src/cli_args.rs index 20aa05ba0b..195a43e02a 100644 --- a/pacquet/tasks/integrated-benchmark/src/cli_args.rs +++ b/pacquet/tasks/integrated-benchmark/src/cli_args.rs @@ -90,7 +90,7 @@ pub struct TargetSpec { pub enum TargetKind { Pacquet, Pnpm, - /// A pacquet client driven through a pnpr install-accelerator server. + /// A pacquet client driven through a pnpr resolver server. /// Builds both the `pacquet` and `pnpr` binaries from the revision's /// monorepo clone, boots a per-target pnpr server with an isolated /// store, and points the client at it via `PNPR_SERVER`. diff --git a/pacquet/tasks/integrated-benchmark/src/work_env.rs b/pacquet/tasks/integrated-benchmark/src/work_env.rs index 882ddbbdb0..f8c8b920d6 100644 --- a/pacquet/tasks/integrated-benchmark/src/work_env.rs +++ b/pacquet/tasks/integrated-benchmark/src/work_env.rs @@ -454,7 +454,7 @@ impl WorkEnv { executor("hyperfine")(&mut command); } - /// Start a pnpr install-accelerator server for every `pnpr@` + /// Start a pnpr resolver server for every `pnpr@` /// target and write the `.pnpr-env` its `install.bash` sources. Each /// server gets an isolated `/pnpr-storage`. The returned /// guards keep the servers alive and kill them on drop; the vec is @@ -485,7 +485,7 @@ impl WorkEnv { .arg(format!("127.0.0.1:{port}")) .arg("--storage") .arg(bench_dir.join("pnpr-storage")) - // The accelerator resolves against the registry the client + // The resolver resolves against the registry the client // sends, caching packuments in its own store. A long TTL keeps // those cached packuments authoritative across the run, the // same value the registry-mock pins for the same reason. @@ -581,7 +581,7 @@ impl WorkEnv { } } -/// A pnpr install-accelerator server spawned for one `pnpr@` +/// A pnpr resolver server spawned for one `pnpr@` /// target. Killed on drop so it never outlives the benchmark run. struct PnprServer { process: Child, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f23001da14..fcac1b58a9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -8067,18 +8067,6 @@ importers: '@pnpm/lockfile.types': specifier: workspace:* version: link:../../lockfile/types - '@pnpm/logger': - specifier: 'catalog:' - version: 1001.0.1 - '@pnpm/store.cafs': - specifier: workspace:* - version: link:../../store/cafs - '@pnpm/store.index': - specifier: workspace:* - version: link:../../store/index - '@pnpm/worker': - specifier: workspace:^ - version: link:../../worker devDependencies: '@pnpm/pnpr.client': specifier: workspace:* diff --git a/pnpm/test/install/pnpmRegistry.ts b/pnpm/test/install/pnpmRegistry.ts index 0a0393f271..6882439a8b 100644 --- a/pnpm/test/install/pnpmRegistry.ts +++ b/pnpm/test/install/pnpmRegistry.ts @@ -11,7 +11,7 @@ import { writeYamlFileSync } from 'write-yaml-file' import { execPnpm } from '../utils/index.js' // The pnpr server started by the test harness (see the with-registry jest -// preset) serves the install-accelerator endpoint (/v1/install) on the +// preset) serves the resolver endpoint (/v1/resolve) on the // registry-mock port, so it doubles as the pnpr server under test. const PNPR = `http://localhost:${REGISTRY_MOCK_PORT}` @@ -20,7 +20,7 @@ let serverPort: number let requestCount: number beforeAll(async () => { - // Counting proxy — forwards to the pnpr server and counts /v1/install + // Counting proxy — forwards to the pnpr server and counts /v1/resolve // requests so we can assert that the pnpr server path was actually taken. requestCount = 0 server = http.createServer((req, res) => { @@ -28,7 +28,7 @@ beforeAll(async () => { res.writeHead(400).end() return } - if (req.url === '/v1/install') { + if (req.url === '/v1/resolve') { requestCount++ } const proxyReq = http.request(`${PNPR}${req.url}`, { diff --git a/pnpr/client/README.md b/pnpr/client/README.md index ef75664188..1e9a10b549 100644 --- a/pnpr/client/README.md +++ b/pnpr/client/README.md @@ -1,15 +1,14 @@ # @pnpm/pnpr.client -Client library for the pnpr server. Reads the local store state, sends it to the server, and writes the received files into the content-addressable store. +Client library for the pnpr server. Resolves a project's dependencies server-side and returns the resolved lockfile. ## How it works -1. Reads integrity hashes from the local store index (`index.db`). -2. Sends `POST /v1/install` to the pnpr server with the project's dependencies and the store integrities. -3. Parses the NDJSON streaming response — `D`-lines (missing file digests) are dispatched to worker downloads against `/v1/files`, `I`-lines are buffered as raw store-index entries, and the final `L`-line yields the resolved lockfile and stats. -4. File download workers write each received file directly to the local CAFS (`files/{hash[:2]}/{hash[2:]}`). -5. Writes store index entries for all new packages in a single SQLite transaction. -6. Returns the resolved lockfile for use with pnpm's headless install (linking phase). +1. Sends `POST /v1/install` to the pnpr server with the project's dependencies (and the existing lockfile, if any, for incremental resolution). +2. The server resolves against the client's registries, verifies the input lockfile under the client's policy, and answers with one gzipped JSON object carrying the resolved lockfile and stats. +3. Returns the resolved lockfile for use with pnpm's headless install, which fetches every tarball directly from the registries in parallel — like a normal install. See [pnpm/pnpm#12230](https://github.com/pnpm/pnpm/issues/12230). + +pnpr is a stateless resolver: it stores no tarballs and serves no file content. ## Usage @@ -17,20 +16,14 @@ This package is used internally by pnpm when the `pnprServer` config option is s ```typescript import { fetchFromPnpmRegistry } from '@pnpm/pnpr.client' -import { StoreIndex } from '@pnpm/store.index' - -const storeIndex = new StoreIndex('/path/to/store') const { lockfile, stats } = await fetchFromPnpmRegistry({ registryUrl: 'http://localhost:4000', - storeDir: '/path/to/store', - storeIndex, dependencies: { react: '^19.0.0' }, devDependencies: { typescript: '^5.0.0' }, }) console.log(`Resolved ${stats.totalPackages} packages`) -console.log(`${stats.alreadyInStore} cached, ${stats.filesToDownload} files downloaded`) // lockfile is ready for headless install ``` diff --git a/pnpr/client/package.json b/pnpr/client/package.json index 8dcf0504b3..b8386d9c4a 100644 --- a/pnpr/client/package.json +++ b/pnpr/client/package.json @@ -1,7 +1,7 @@ { "name": "@pnpm/pnpr.client", "version": "1.1.0", - "description": "Client for the pnpr server — sends store state, receives resolved lockfile and missing files", + "description": "Client for the pnpr server — resolves a project server-side and receives the resolved lockfile", "keywords": [ "pnpm", "pnpm11" @@ -32,13 +32,7 @@ }, "dependencies": { "@pnpm/lockfile.fs": "workspace:*", - "@pnpm/lockfile.types": "workspace:*", - "@pnpm/store.cafs": "workspace:*", - "@pnpm/store.index": "workspace:*" - }, - "peerDependencies": { - "@pnpm/logger": "catalog:", - "@pnpm/worker": "workspace:^" + "@pnpm/lockfile.types": "workspace:*" }, "devDependencies": { "@pnpm/pnpr.client": "workspace:*", diff --git a/pnpr/client/src/index.ts b/pnpr/client/src/index.ts index 249f6d1c21..d23395ba9a 100644 --- a/pnpr/client/src/index.ts +++ b/pnpr/client/src/index.ts @@ -1,2 +1,2 @@ -export { fetchFromPnpmRegistry, type FetchFromPnpmRegistryOptions, type FetchFromPnpmRegistryResult, writeRawIndexEntries } from './fetchFromPnpmRegistry.js' export { type ResponseMetadata } from './protocol.js' +export { resolveViaPnprServer, type ResolveViaPnprServerOptions, type ResolveViaPnprServerResult } from './resolveViaPnprServer.js' diff --git a/pnpr/client/src/protocol.ts b/pnpr/client/src/protocol.ts index 9dc7b27a62..ce13e05acb 100644 --- a/pnpr/client/src/protocol.ts +++ b/pnpr/client/src/protocol.ts @@ -4,11 +4,5 @@ export interface ResponseMetadata { lockfile: LockfileObject stats: { totalPackages: number - alreadyInStore: number - packagesToFetch: number - filesInNewPackages: number - filesAlreadyInCafs: number - filesToDownload: number - downloadBytes: number } } diff --git a/pnpr/client/src/fetchFromPnpmRegistry.ts b/pnpr/client/src/resolveViaPnprServer.ts similarity index 57% rename from pnpr/client/src/fetchFromPnpmRegistry.ts rename to pnpr/client/src/resolveViaPnprServer.ts index 9bd46f7df7..8605ddca6c 100644 --- a/pnpr/client/src/fetchFromPnpmRegistry.ts +++ b/pnpr/client/src/resolveViaPnprServer.ts @@ -5,8 +5,6 @@ import { gunzip } from 'node:zlib' import { convertToLockfileObject } from '@pnpm/lockfile.fs' import type { LockfileFile, LockfileObject } from '@pnpm/lockfile.types' -import { StoreIndex } from '@pnpm/store.index' -import { writeCafsFiles } from '@pnpm/worker' import type { ResponseMetadata } from './protocol.js' @@ -18,13 +16,9 @@ export interface PnprProject { optionalDependencies?: Record } -export interface FetchFromPnpmRegistryOptions { +export interface ResolveViaPnprServerOptions { /** URL of the pnpr server */ registryUrl: string - /** Client's store directory */ - storeDir: string - /** Client's store index */ - storeIndex: StoreIndex /** Dependencies to resolve (single project) */ dependencies?: Record /** Dev dependencies to resolve (single project) */ @@ -42,13 +36,13 @@ export interface FetchFromPnpmRegistryOptions { namedRegistries?: Record /** * The caller's forwarded upstream credentials, keyed by nerf-darted - * registry URI, so the server resolves/fetches private content as the + * registry URI, so the server resolves private content as the * caller. Distinct from `authorization` (pnpr identity). */ authHeaders?: Record /** * `Authorization` for the pnpr server's own URL (`undefined` if none): - * identifies the caller to pnpr's gate and keys the grant table. + * identifies the caller to pnpr's gate. */ authorization?: string /** Overrides */ @@ -63,46 +57,32 @@ export interface FetchFromPnpmRegistryOptions { * `readWantedLockfileFile` so no in-memory→on-disk round-trip is needed. */ lockfile?: LockfileFile - /** - * `--lockfile-only`: resolve and return only the lockfile — fetch no - * files into the local store. Forwarded to the server (which skips the - * file diff); the client ignores the (empty) file payload so the store - * stays untouched. Mirrors pnpm's resolve + write, fetch nothing, link - * nothing. See https://github.com/pnpm/pnpm/issues/12146. - */ - lockfileOnly?: boolean } -export interface FetchFromPnpmRegistryResult { +export interface ResolveViaPnprServerResult { lockfile: LockfileObject stats: ResponseMetadata['stats'] - /** Promise that resolves when all file downloads are written to CAFS */ - fileDownloads: Promise - /** Pre-packed store index entries to write to SQLite */ - indexEntries: Array<{ key: string, buffer: Uint8Array }> } -interface InstallResponseHeader { +interface ResolveResponse { lockfile: LockfileFile stats: ResponseMetadata['stats'] - indexEntries?: Array<{ key: string, b64: string }> violations?: Array<{ name: string, version: string, code: string, reason: string }> } /** - * Fetch resolved dependencies from a pnpr server in a single round trip. + * Resolve a project against a pnpr server and return the resolved + * lockfile. * - * `POST /v1/install` (with `inlineFiles`) answers with one gzipped binary - * body: a length-prefixed JSON header (lockfile, stats, store-index - * entries, or verification violations) followed by the missing files' - * contents as binary frames. We parse the header here and hand the file - * frames to a worker that writes them straight into the CAFS. + * `POST /v1/resolve` answers with one gzipped JSON object carrying the + * server-resolved, server-verified lockfile (and stats). pnpr serves no + * file content — the caller fetches every tarball itself, in parallel, + * like a normal install + * ([pnpm/pnpm#12230](https://github.com/pnpm/pnpm/issues/12230)). */ -export async function fetchFromPnpmRegistry ( - opts: FetchFromPnpmRegistryOptions -): Promise { - const storeIntegrities = readStoreIntegrities(opts.storeIndex) - +export async function resolveViaPnprServer ( + opts: ResolveViaPnprServerOptions +): Promise { const projects = opts.projects ?? [{ dir: '.', dependencies: opts.dependencies, @@ -124,85 +104,38 @@ export async function fetchFromPnpmRegistry ( // protocol carries (split `packages`/`snapshots`, `{ specifier, version }` // importer deps). lockfile: opts.lockfile, - lockfileOnly: opts.lockfileOnly, - storeIntegrities, - inlineFiles: true, }) - const body = await postInstall(opts.registryUrl, requestBody, opts.authorization) + const body = await postResolve(opts.registryUrl, requestBody, opts.authorization) + const response = JSON.parse(body.toString('utf-8')) as ResolveResponse - // The combined response is `[u32 header length][header JSON][file frames]`. - if (body.length < 4) { - throw new Error('pnpr server returned a truncated /v1/install response') - } - const headerLength = body.readUInt32BE(0) - const header = JSON.parse(body.subarray(4, 4 + headerLength).toString('utf-8')) as InstallResponseHeader - - if (header.violations != null && header.violations.length > 0) { - const rendered = header.violations + if (response.violations != null && response.violations.length > 0) { + const rendered = response.violations .map((violation) => ` ${violation.name}@${violation.version}: ${violation.reason}`) .join('\n') throw new Error(`pnpr server rejected the lockfile under the verification policy:\n${rendered}`) } - const indexEntries = (header.indexEntries ?? []).map(({ key, b64 }) => ({ - key, - buffer: new Uint8Array(Buffer.from(b64, 'base64')), - })) - - // `--lockfile-only` fetches nothing: there are no file frames to write - // (the server sends only the end-of-stream marker), so leave the store - // untouched. - const fileDownloads = opts.lockfileOnly - ? Promise.resolve() - : writeCafsFiles({ - storeDir: opts.storeDir, - payload: body.subarray(4 + headerLength), - }).then(() => {}) - return { // The server speaks the on-disk lockfile format; convert it to the // in-memory `LockfileObject` the rest of pnpm consumes. - lockfile: convertToLockfileObject(header.lockfile), - stats: header.stats, - fileDownloads, - indexEntries, + lockfile: convertToLockfileObject(response.lockfile), + stats: response.stats, } } -function readStoreIntegrities (storeIndex: StoreIndex): string[] { - const seen = new Set() - for (const key of storeIndex.keys()) { - const tabIdx = key.indexOf('\t') - if (tabIdx === -1) continue - const integrity = key.slice(0, tabIdx) - // StoreIndex also stores non-integrity keys (e.g. git-hosted entries - // keyed by URL). Filter to actual SRI hashes — sending those over to - // the pnpr server would just bloat the request without ever matching. - if (!isIntegrityLike(integrity)) continue - seen.add(integrity) - } - return [...seen] -} - -function isIntegrityLike (value: string): boolean { - return value.startsWith('sha512-') || - value.startsWith('sha256-') || - value.startsWith('sha1-') -} - const REQUEST_TIMEOUT = 600_000 // 10 minutes — server-side resolution can be slow on first run /** - * `POST /v1/install` and return the full response body, decompressed. + * `POST /v1/resolve` and return the full response body, decompressed. * * `urlPath` resolution normalizes the base to end with "/" so a path * prefix configured on the pnpr server URL (e.g. https://host/pnpr/) is * preserved. */ -async function postInstall (registryUrl: string, body: string, authorization?: string): Promise { +async function postResolve (registryUrl: string, body: string, authorization?: string): Promise { const base = registryUrl.endsWith('/') ? registryUrl : `${registryUrl}/` - const url = new URL('v1/install', base) + const url = new URL('v1/resolve', base) const requestFn = url.protocol === 'https:' ? https.request : http.request const headers: http.OutgoingHttpHeaders = { @@ -210,8 +143,8 @@ async function postInstall (registryUrl: string, body: string, authorization?: s 'Content-Length': Buffer.byteLength(body), 'Accept-Encoding': 'gzip', } - // Identify the caller to the pnpr server's access gate so protected - // packages resolve and the per-user grant table keys on the right user. + // Identify the caller to the pnpr server so private packages resolve + // with the right credentials. if (authorization != null) { headers.Authorization = authorization } @@ -264,13 +197,3 @@ async function postInstall (registryUrl: string, body: string, authorization?: s req.end() }) } - -export function writeRawIndexEntries ( - indexEntries: Array<{ key: string, buffer: Uint8Array }>, - storeIndex: StoreIndex -): void { - const writes = indexEntries.filter(({ key }) => !storeIndex.has(key)) - if (writes.length > 0) { - storeIndex.setRawMany(writes) - } -} diff --git a/pnpr/client/tsconfig.json b/pnpr/client/tsconfig.json index 5c76c6378e..00762dd272 100644 --- a/pnpr/client/tsconfig.json +++ b/pnpr/client/tsconfig.json @@ -17,15 +17,6 @@ }, { "path": "../../lockfile/types" - }, - { - "path": "../../store/cafs" - }, - { - "path": "../../store/index" - }, - { - "path": "../../worker" } ] } diff --git a/pnpr/crates/pnpr/src/config.rs b/pnpr/crates/pnpr/src/config.rs index 60a8a1fe5a..483a0da605 100644 --- a/pnpr/crates/pnpr/src/config.rs +++ b/pnpr/crates/pnpr/src/config.rs @@ -64,8 +64,8 @@ pub struct Config { /// keep it on a durable volume. pub storage: PathBuf, /// Directory under which the disposable proxy cache lives — - /// the mirror of upstream registries plus the install-accelerator - /// store. Safe to wipe at any time; it self-heals on the next + /// the mirror of upstream registries plus the resolver's cache. + /// Safe to wipe at any time; it self-heals on the next /// request. Defaults to a `.pnpr-cache` subdirectory of /// [`Self::storage`]; set the YAML `cache:` key (or `--cache`) to /// an absolute path to put it on separate, ephemeral disk. @@ -99,11 +99,6 @@ pub struct Config { /// installs at startup. Sourced from the YAML `log:` object /// (Verdaccio 6+ shape). Defaults to pretty/info. pub logs: LogConfig, - /// How long the install accelerator keeps a per-user access grant - /// before re-verifying. `None` (default) is permanent (revocation - /// relies on clear-on-discovery). YAML `installAccelerator.grantTtl` - /// (seconds). - pub install_accelerator_grant_ttl: Option, /// Where the authoritative (hosted) store lives. Defaults to /// [`HostedStoreConfig::Fs`] — the local [`Self::storage`] /// directory. The YAML `s3:` block switches it to an S3-compatible @@ -548,19 +543,6 @@ struct ConfigFile { /// intentionally not accepted. #[serde(default)] log: Option, - /// pnpr-only block tuning the install accelerator. Absent on a - /// stock verdaccio config (silently ignored there, like the other - /// keys verdaccio doesn't share). - #[serde(default, rename = "installAccelerator")] - install_accelerator: Option, -} - -/// The YAML `installAccelerator:` block. -#[derive(Debug, Default, Deserialize)] -struct InstallAcceleratorFile { - /// `grantTtl` in seconds. Absent ⇒ permanent grants. - #[serde(default, rename = "grantTtl")] - grant_ttl: Option, } /// The YAML `log:` object. Mirrors verdaccio 6's logger config. @@ -643,7 +625,6 @@ impl Config { policies: PackagePolicies::registry_mock_defaults(), auth: AuthConfig::default(), logs: LogConfig::default(), - install_accelerator_grant_ttl: None, hosted_store: HostedStoreConfig::Fs, backend: BackendConfig::Local, } @@ -663,7 +644,6 @@ impl Config { policies: PackagePolicies::registry_mock_defaults(), auth: AuthConfig::default(), logs: LogConfig::default(), - install_accelerator_grant_ttl: None, hosted_store: HostedStoreConfig::Fs, backend: BackendConfig::Local, } @@ -819,10 +799,6 @@ impl Config { policies, auth, logs, - install_accelerator_grant_ttl: file - .install_accelerator - .and_then(|block| block.grant_ttl) - .map(Duration::from_secs), hosted_store, backend, }) diff --git a/pnpr/crates/pnpr/src/install_accelerator.rs b/pnpr/crates/pnpr/src/install_accelerator.rs deleted file mode 100644 index 955d1e5a3c..0000000000 --- a/pnpr/crates/pnpr/src/install_accelerator.rs +++ /dev/null @@ -1,792 +0,0 @@ -//! pnpr install accelerator: server-side dependency resolution plus file-level -//! store deduplication, exposed as an additive, opt-in protocol -//! alongside pnpr's npm-compatible API. The handshake + endpoints are -//! served under one base URL (the `pnprServer`). -//! -//! Two routes, built on pacquet's resolver and content-addressable -//! store: -//! -//! * `GET /-/pnpr` — capability handshake; advertises the supported -//! protocol versions so a client can negotiate or fail fast. -//! * `POST /v1/install` — resolve a project **against the registries -//! the client sends** (so the server uses the same source of truth as -//! the client), then return, in a single gzipped binary response, the -//! lockfile, stats, pre-packed store-index entries, and the contents of -//! the files the client is missing (a length-prefixed JSON header -//! followed by the binary file frames). One round trip -//! ([pnpm/pnpm#12165](https://github.com/pnpm/pnpm/issues/12165)). -//! -//! Files are bound to access ([`authorize_served_packages`]): a -//! content-addressed digest is never a bearer capability. Anonymous -//! content is checked against pnpr's own `packages:` policy; content -//! fetched with the caller's forwarded credentials is gated per user -//! against the owning registry. -//! -//! The client's `registry`, `namedRegistries`, `overrides`, and the -//! verification policy (`minimumReleaseAge`, `trustPolicy`, ...) drive -//! resolution and verification. When the client sends its on-disk -//! lockfile, the server verifies it under the client's policy before -//! resolving, then reuses it as the resolution seed (frozen → as-is; -//! non-frozen → reuse-and-update). A multi-project workspace is resolved -//! by reconstructing the workspace on disk (root manifest + -//! `pnpm-workspace.yaml` + member manifests) and letting pacquet's -//! install path discover and resolve every importer. The client also -//! forwards its per-registry credentials, so private dependencies resolve -//! and fetch as the caller. Responses are buffered rather than truly -//! streamed. - -mod diff; -mod grant_table; -mod protocol; -mod public_packages; -mod resolve; -mod verdict_cache; - -#[cfg(test)] -mod tests; - -use std::{ - collections::{HashMap, HashSet}, - io::Write as _, - path::PathBuf, - sync::{Arc, Mutex, OnceLock}, - time::Duration, -}; - -use crate::{ - config::Config as RegistryConfig, - policy::{Identity, PackagePolicies}, -}; - -use axum::{ - body::{Body, Bytes}, - http::{StatusCode, header}, - response::Response, -}; -use base64::{Engine, engine::general_purpose::STANDARD as BASE64}; -use flate2::{Compression, write::GzEncoder}; -use indexmap::IndexMap; -use pacquet_config::Config as PacquetConfig; -use pacquet_lockfile::Lockfile; -use pacquet_lockfile_verification::{collect_resolution_policy_violations, hash_lockfile}; -use pacquet_network::{AuthHeaders, ThrottledClient}; -use pacquet_package_manager::build_resolution_verifiers; -use pacquet_resolving_npm_resolver::{InMemoryPackageMetaCache, PackageMetaCache, to_registry_url}; -use pacquet_resolving_resolver_base::ResolutionVerifier; -use pacquet_store_dir::{StoreDir, StoreIndex}; - -use self::{ - grant_table::GrantTable, protocol::InstallRequest, public_packages::PublicPackages, - verdict_cache::VerdictCache, -}; - -/// Per-server engine backing the pnpr install endpoints: it holds the -/// store, cache, and HTTP client used to resolve a client's project and -/// serve the files its store is missing. The store and cache dirs are -/// fixed for the server's lifetime; the *registries* come from each -/// client request (the server resolves against the client's registries, -/// not its own), so the `&'static Config` the install path requires is -/// interned per distinct client registry configuration rather than -/// leaked once or per request. -/// -/// Held lazily in a [`OnceLock`] on the server's state so servers that -/// never receive such a request pay nothing, and so each server in -/// a multi-server test process keeps its own store. -pub(crate) struct InstallAccelerator { - store_dir: StoreDir, - cache_dir: PathBuf, - client: Arc, - /// One leaked `Config` per distinct client registry configuration, - /// keyed by its canonical JSON. Bounds the leak to the number of - /// distinct client setups the server sees (typically one). - configs: Mutex>, - /// SQLite-backed whole-lockfile verification verdict cache. `None` - /// only if the database couldn't be opened — verification then runs - /// every time (uncached) rather than failing the server. - verdict_cache: Option, - /// Per-`(user, name@version)` access grants for externally-resolved - /// private content. `None` if the DB couldn't be opened (every such - /// package then re-verifies uncached). See [`GrantTable`]. - grant_table: Option, - /// Global set of anonymously-readable package names, so a public - /// package isn't gated per user. `None` if the DB couldn't be opened. - /// See [`PublicPackages`]. - public_packages: Option, - /// How long a grant (or public classification) stays valid. `None` - /// (the default) is permanent, leaving revocation to - /// clear-on-discovery; a TTL lets it bite already-seen versions. - grant_ttl: Option, -} - -impl InstallAccelerator { - pub(crate) fn get_or_init<'a>( - cell: &'a OnceLock, - config: &RegistryConfig, - ) -> &'a InstallAccelerator { - cell.get_or_init(|| InstallAccelerator::build(config)) - } - - fn build(config: &RegistryConfig) -> InstallAccelerator { - let store_dir = config.cache_storage.join("pnpr-store"); - let cache_dir = config.cache_storage.join("pnpr-cache"); - // Best-effort: a real failure here (e.g. a permission problem) - // resurfaces with a precise error on the first store/cache write - // during resolution, so there's nothing actionable to report yet. - let _ = std::fs::create_dir_all(&store_dir); - let _ = std::fs::create_dir_all(&cache_dir); - let verdict_cache = VerdictCache::open(&cache_dir.join("lockfile-verdicts.sqlite")).ok(); - let grant_table = GrantTable::open(&cache_dir.join("install-grants.sqlite")).ok(); - let public_packages = PublicPackages::open(&cache_dir.join("public-packages.sqlite")).ok(); - InstallAccelerator { - store_dir: StoreDir::new(store_dir), - cache_dir, - client: Arc::new(ThrottledClient::new_for_installs()), - configs: Mutex::new(HashMap::new()), - verdict_cache, - grant_table, - public_packages, - grant_ttl: config.install_accelerator_grant_ttl, - } - } - - /// Resolve (or build + intern) the `&'static Config` for a request's - /// registry configuration. Pacquet's install path resolves against - /// `config.registry` / `named_registries` / `overrides`, so a request - /// from a client with a different registry setup gets its own Config. - fn config_for(&self, request: &InstallRequest) -> &'static PacquetConfig { - let registry = - request.registry.clone().unwrap_or_else(|| "https://registry.npmjs.org/".to_string()); - let registry = if registry.ends_with('/') { registry } else { format!("{registry}/") }; - let overrides: Option> = - request.overrides.as_ref().and_then(|value| serde_json::from_value(value.clone()).ok()); - - let key = serde_json::json!({ - "registry": registry, - "namedRegistries": request.named_registries, - "overrides": overrides, - "minimumReleaseAge": request.minimum_release_age, - "minimumReleaseAgeExclude": request.minimum_release_age_exclude, - "minimumReleaseAgeIgnoreMissingTime": request.minimum_release_age_ignore_missing_time, - "trustPolicy": request.trust_policy, - "trustPolicyExclude": request.trust_policy_exclude, - "trustPolicyIgnoreAfter": request.trust_policy_ignore_after, - }) - .to_string(); - - let mut configs = self.configs.lock().expect("config cache poisoned"); - if let Some(config) = configs.get(&key) { - return config; - } - - let mut config = PacquetConfig::new(); - config.store_dir = self.store_dir.clone(); - config.cache_dir = self.cache_dir.clone(); - config.registry = registry; - config.named_registries = request.named_registries.clone(); - config.overrides = overrides; - config.modules_dir = PathBuf::from("node_modules"); - config.lockfile = true; - config.verify_store_integrity = true; - // The client's verification policy drives both the input-lockfile - // verifier and the resolver's pick-time `minimumReleaseAge` / - // `trustPolicy` checks, so newly-resolved entries are held to the - // same policy as the reused ones. - config.minimum_release_age = request.minimum_release_age; - config.minimum_release_age_exclude = request.minimum_release_age_exclude.clone(); - if let Some(ignore_missing_time) = request.minimum_release_age_ignore_missing_time { - config.minimum_release_age_ignore_missing_time = ignore_missing_time; - } - config.trust_policy = request.trust_policy; - config.trust_policy_exclude = request.trust_policy_exclude.clone(); - config.trust_policy_ignore_after = request.trust_policy_ignore_after; - let config: &'static PacquetConfig = config.leak(); - configs.insert(key, config); - config - } -} - -/// Handle `POST /v1/install`. `identity` is the resolved caller; the -/// store's possession of a package's bytes is not a capability to read -/// them, so every served package is authorized first — see -/// [`authorize_served_packages`]. -pub(crate) async fn handle_install( - runtime: &InstallAccelerator, - policies: &PackagePolicies, - identity: Identity, - body: Bytes, -) -> Response { - let request: InstallRequest = match serde_json::from_slice(&body) { - Ok(request) => request, - Err(err) => return json_error(StatusCode::BAD_REQUEST, &err.to_string()), - }; - - // Resolve against the client's registries, not the server's own. - let config = runtime.config_for(&request); - - // The caller's forwarded upstream credentials, threaded through - // resolve/verify/fetch but kept out of the interned `config` so it - // never leaks a `&'static Config` per user. - let request_auth = Arc::new(AuthHeaders::from_map( - request.auth_headers.iter().map(|(uri, value)| (uri.clone(), value.clone())).collect(), - )); - - // Verify the *input* lockfile under the client's policy before - // resolving ([pnpm/pnpm#12139](https://github.com/pnpm/pnpm/issues/12139)). - // The client skips its own `verifyLockfileResolutions` whenever a - // pnpr server is configured, so this is the only place the - // committed/reused entries get checked. A true first install sends - // no lockfile — nothing to verify. `trustLockfile` is the client's - // opt-out (mirrors the local path's `--trust-lockfile`). Freshly- - // resolved entries are held to the same policy by the resolver's - // pick-time gate (the policy is wired into `config`). - if !request.trust_lockfile - && let Some(input_lockfile) = request.lockfile.as_ref() - && let Err(failure) = - verify_input_lockfile(runtime, config, &request_auth, input_lockfile).await - { - return match failure { - VerifyFailure::Internal(response) => response, - VerifyFailure::Violations(violations) => violation_response(&violations), - }; - } - - let lockfile = match resolve::resolve(config, &runtime.client, &request, &request_auth).await { - Ok(lockfile) => lockfile, - Err(err) => return json_error(StatusCode::INTERNAL_SERVER_ERROR, &err.to_string()), - }; - - let packages = resolve::collect_packages(&lockfile, &config.registry); - - // `pkg_id`s fetched from upstream this request: the registry accepted - // the caller's token for each, so the gate treats them as proven. - let mut freshly_fetched: HashSet = HashSet::new(); - - // `--lockfile-only`: pnpm resolves and writes the lockfile but - // fetches nothing and links nothing. Skip the tarball fetch + the - // file-level diff and return just the lockfile; the client writes it - // and stops, so the response carries no `D`/`I` lines. - // See [pnpm/pnpm#12146](https://github.com/pnpm/pnpm/issues/12146). - let result = if request.lockfile_only { - diff::DiffResult { - missing_files: Vec::new(), - package_index: Vec::new(), - stats: diff::Stats { total_packages: packages.len() as u64, ..diff::Stats::default() }, - } - } else { - match resolve::fetch_uncached(config, &runtime.client, &request_auth, &packages).await { - Ok(fetched) => freshly_fetched = fetched, - Err(err) => return json_error(StatusCode::INTERNAL_SERVER_ERROR, &err.to_string()), - } - - let store = match StoreIndex::open_readonly_in(&config.store_dir) { - Ok(store) => store, - Err(err) => return json_error(StatusCode::INTERNAL_SERVER_ERROR, &err.to_string()), - }; - - let diff_packages: Vec = packages - .iter() - .map(|pkg| diff::ResolvedPackage { - integrity: pkg.integrity.clone(), - pkg_id: pkg.pkg_id.clone(), - }) - .collect(); - - match diff::compute_diff(&store, &diff_packages, &request.store_integrities) { - Ok(result) => result, - Err(err) => return json_error(StatusCode::INTERNAL_SERVER_ERROR, &err.to_string()), - } - }; - - if let Some(denied) = authorize_served_packages( - runtime, - policies, - &identity, - &request, - &request_auth, - &freshly_fetched, - &result.package_index, - ) - .await - { - return denied; - } - - let stats_json = stats_json(&result.stats); - inline_response(runtime, &lockfile, &stats_json, &result) -} - -fn stats_json(stats: &diff::Stats) -> serde_json::Value { - serde_json::json!({ - "totalPackages": stats.total_packages, - "alreadyInStore": stats.already_in_store, - "packagesToFetch": stats.packages_to_fetch, - "filesInNewPackages": stats.files_in_new_packages, - "filesAlreadyInCafs": stats.files_already_in_cafs, - "filesToDownload": stats.files_to_download, - "downloadBytes": stats.download_bytes, - }) -} - -/// Authorize every served package before its files leave the store (a -/// shared content digest is never a read capability), dispatched by -/// whether a forwarded credential was used to fetch it: such packages are -/// gated per user against the owning registry -/// ([`authorize_upstream_package`]); the rest by pnpr's local `packages:` -/// policy ([`deny_local_policy`]). Returns the first denial, or `None`. -async fn authorize_served_packages( - runtime: &InstallAccelerator, - policies: &PackagePolicies, - identity: &Identity, - request: &InstallRequest, - request_auth: &AuthHeaders, - freshly_fetched: &HashSet, - served: &[diff::PackageIndexEntry], -) -> Option { - // The default registry pnpr resolved against (what `collect_packages` - // / `fetch_uncached` built every tarball URL from). Per-scope external - // registries are a future refinement. - let registry = request.registry.as_deref().unwrap_or("https://registry.npmjs.org/"); - - let mut local_pkg_ids: Vec<&str> = Vec::new(); - for entry in served { - let Some(name) = package_name(&entry.pkg_id) else { continue }; - let pkg_url = to_registry_url(registry, name); - if request_auth.for_url(&pkg_url).is_none() { - local_pkg_ids.push(entry.pkg_id.as_str()); - continue; - } - if let Some(denied) = authorize_upstream_package( - runtime, - identity, - request_auth, - freshly_fetched, - registry, - name, - &entry.pkg_id, - ) - .await - { - return Some(denied); - } - } - - deny_local_policy(policies, identity, local_pkg_ids.into_iter()) -} - -/// Deny when the caller may not read a package gated by pnpr's own -/// `packages:` policy. 401 for anonymous, 403 for an authenticated caller -/// outside the allowed set; `None` when every name is readable. -fn deny_local_policy<'a>( - policies: &PackagePolicies, - identity: &Identity, - pkg_ids: impl Iterator, -) -> Option { - let mut checked: HashSet<&str> = HashSet::new(); - for pkg_id in pkg_ids { - let Some(name) = package_name(pkg_id) else { continue }; - if !checked.insert(name) { - continue; - } - if !policies.for_package(name).access.allows(identity) { - let status = match identity { - Identity::Anonymous => StatusCode::UNAUTHORIZED, - Identity::User { .. } => StatusCode::FORBIDDEN, - }; - return Some(json_error(status, &format!("not authorized to access {name:?}"))); - } - } - None -} - -/// Authorize one upstream-as-authority package: the owning registry, not -/// pnpr, decides. Known-public, freshly fetched, or already granted → -/// allow (recording a grant where applicable); otherwise probe the -/// registry anonymously (a `2xx` records it public globally) then -/// re-verify with the caller's token (`2xx` grants, `401`/`403` clears the -/// caller's grants and denies). Grants key on an identified user; the -/// global public set benefits anonymous callers too. See the body's -/// branches and the module tests for each path. -async fn authorize_upstream_package( - runtime: &InstallAccelerator, - identity: &Identity, - request_auth: &AuthHeaders, - freshly_fetched: &HashSet, - registry: &str, - name: &str, - pkg_id: &str, -) -> Option { - // Public content needs no per-user gating, so it never reaches the - // grant table or an upstream round trip once classified. - if let Some(public) = runtime.public_packages.as_ref() - && public.is_public(name, runtime.grant_ttl) - { - return None; - } - - let user = match identity { - Identity::User { username } => Some(username.as_str()), - Identity::Anonymous => None, - }; - let grants = || user.zip(runtime.grant_table.as_ref()); - - // The cold fetch this request already proved access: the upstream - // accepted the caller's forwarded token. - if freshly_fetched.contains(pkg_id) { - if let Some((user, table)) = grants() { - table.record(user, pkg_id); - } - return None; - } - - if let Some((user, table)) = grants() - && table.is_granted(user, pkg_id, runtime.grant_ttl) - { - return None; - } - - // Classify before gating per user: a package the registry serves - // anonymously is public — record it globally so no one probes it - // again. Only a token-gated package takes the per-user path below. - if let UpstreamAccess::Authorized = - probe_upstream_access(&runtime.client, None, registry, name).await - { - if let Some(public) = runtime.public_packages.as_ref() { - public.record(name); - } - return None; - } - - match probe_upstream_access(&runtime.client, Some(request_auth), registry, name).await { - UpstreamAccess::Authorized => { - if let Some((user, table)) = grants() { - table.record(user, pkg_id); - } - None - } - UpstreamAccess::Denied => { - if let Some((user, table)) = grants() { - table.clear_package(user, name); - } - Some(json_error(StatusCode::FORBIDDEN, &format!("not authorized to access {name:?}"))) - } - UpstreamAccess::Unknown => Some(json_error( - StatusCode::BAD_GATEWAY, - &format!("could not verify access to {name:?}"), - )), - } -} - -/// Outcome of an upstream access probe. -enum UpstreamAccess { - /// The upstream served the package's packument for the probe. - Authorized, - /// The upstream returned `401`/`403`. - Denied, - /// The upstream was unreachable or returned some other status; access - /// can't be decided. - Unknown, -} - -/// Probe whether `name` is readable from `registry` by fetching its -/// (abbreviated) packument. `auth` set attaches the caller's credential -/// (a re-verify); `auth` `None` is anonymous (a public/private check). -async fn probe_upstream_access( - client: &ThrottledClient, - auth: Option<&AuthHeaders>, - registry: &str, - name: &str, -) -> UpstreamAccess { - let url = to_registry_url(registry, name); - let guard = client.acquire_for_url(&url).await; - let mut request = guard.get(&url).header("accept", "application/vnd.npm.install-v1+json"); - if let Some(value) = auth.and_then(|auth| auth.for_url(&url)) { - request = request.header("authorization", value); - } - match request.send().await { - Ok(response) => { - let status = response.status().as_u16(); - if (200..300).contains(&status) { - UpstreamAccess::Authorized - } else if status == 401 || status == 403 { - UpstreamAccess::Denied - } else { - UpstreamAccess::Unknown - } - } - Err(_) => UpstreamAccess::Unknown, - } -} - -/// The package name from a `name@version` package id, tolerating a -/// leading scope `@` (`@scope/foo@1.0.0` → `@scope/foo`). -fn package_name(pkg_id: &str) -> Option<&str> { - let at = pkg_id.rfind('@')?; - (at > 0).then_some(&pkg_id[..at]) -} - -/// gzip level for the install response body. Level 6 (the gzip default) -/// shrinks the payload ~16% over level 1 — the win that matters once the -/// server is across a latency link, where fewer bytes means fewer TCP -/// slow-start round trips — while level 9 adds under a percent for several -/// times the CPU. -const FILES_GZIP_LEVEL: u32 = 6; - -/// Content type of the install response: a length-prefixed JSON header -/// followed by the [`build_files_payload`] binary frames, gzip-compressed. -const INLINE_CONTENT_TYPE: &str = "application/x-pnpr-install-inline"; - -/// Build the single-response body: the lockfile, stats, and store-index -/// entries in a length-prefixed JSON header, followed by the contents of -/// the files the client is missing as binary frames — so the client -/// materializes everything from one round trip. -fn inline_response( - runtime: &InstallAccelerator, - lockfile: &Lockfile, - stats_json: &serde_json::Value, - result: &diff::DiffResult, -) -> Response { - let index_entries: Vec = result - .package_index - .iter() - .map(|entry| { - serde_json::json!({ - "key": format!("{}\t{}", entry.integrity, entry.pkg_id), - "b64": BASE64.encode(&entry.raw), - }) - }) - .collect(); - let header = serde_json::json!({ - "lockfile": serde_json::to_value(lockfile).unwrap_or(serde_json::Value::Null), - "stats": stats_json, - "indexEntries": index_entries, - }); - - let files = result.missing_files.iter().map(|file| (file.digest.as_str(), file.executable)); - let files_payload = match build_files_payload(&runtime.store_dir, files) { - Ok(payload) => payload, - Err((status, message)) => return json_error(status, &message), - }; - - finish_inline_response(&header, &files_payload) -} - -/// Frame a JSON `header` and an already-built [`build_files_payload`] -/// byte buffer into one length-prefixed, gzip-compressed body. -fn finish_inline_response(header: &serde_json::Value, files_payload: &[u8]) -> Response { - let header_bytes = serde_json::to_vec(header).unwrap_or_else(|_| b"{}".to_vec()); - let Ok(header_len) = u32::try_from(header_bytes.len()) else { - return json_error(StatusCode::INTERNAL_SERVER_ERROR, "install header too large"); - }; - let mut body = Vec::with_capacity(4 + header_bytes.len() + files_payload.len()); - body.extend_from_slice(&header_len.to_be_bytes()); - body.extend_from_slice(&header_bytes); - body.extend_from_slice(files_payload); - - let mut encoder = GzEncoder::new(Vec::new(), Compression::new(FILES_GZIP_LEVEL)); - if encoder.write_all(&body).is_err() { - return json_error(StatusCode::INTERNAL_SERVER_ERROR, "gzip failed"); - } - let gzipped = match encoder.finish() { - Ok(gzipped) => gzipped, - Err(_) => return json_error(StatusCode::INTERNAL_SERVER_ERROR, "gzip failed"), - }; - - Response::builder() - .status(StatusCode::OK) - .header(header::CONTENT_TYPE, INLINE_CONTENT_TYPE) - .header(header::CONTENT_ENCODING, "gzip") - .body(Body::from(gzipped)) - .expect("binary response is always valid") -} - -/// Why [`verify_input_lockfile`] failed: either the lockfile violated -/// the client's policy (carry the rendered violations so the caller can -/// shape them for the client's protocol) or the verifiers couldn't be -/// built at all (a ready-made error response). -enum VerifyFailure { - Violations(Vec), - Internal(Response), -} - -/// Verify the client's input lockfile under the client's policy. On a -/// clean pass returns `Ok(())`; on a policy violation returns the -/// rendered violations so the caller can deliver them in whichever -/// protocol the client asked for (NDJSON `E` line or inline header). A -/// build-verifiers failure (e.g. an invalid exclude pattern) returns a -/// ready-made 500. -async fn verify_input_lockfile( - runtime: &InstallAccelerator, - config: &'static PacquetConfig, - auth_headers: &Arc, - lockfile: &Lockfile, -) -> Result<(), VerifyFailure> { - // A fresh per-request packument cache shared with the verifier; the - // on-disk metadata mirror under `/v11/metadata-full` is - // warm across requests and is the real verification cache. - let meta_cache = Arc::new(InMemoryPackageMetaCache::default()); - let verifiers = build_resolution_verifiers( - config, - Arc::clone(&runtime.client), - Some(meta_cache as Arc), - Some(Arc::clone(auth_headers)), - ) - .map_err(|err| { - VerifyFailure::Internal(json_error(StatusCode::INTERNAL_SERVER_ERROR, &err.to_string())) - })?; - - // Whole-lockfile verdict cache: an O(1) hit when this exact lockfile - // already passed under a policy we still trust skips the whole fan-out - // (the dominant win for a shared pnpr — CI re-runs, a fleet building - // the same repo). - let hash = hash_lockfile(lockfile); - if let Some(cache) = runtime.verdict_cache.as_ref() - && cache.is_verified(&hash, |policy| { - verifiers.iter().all(|verifier| verifier.can_trust_past_check(policy)) - }) - { - return Ok(()); - } - - let violations = collect_resolution_policy_violations(lockfile, &verifiers, None).await; - if violations.is_empty() { - if let Some(cache) = runtime.verdict_cache.as_ref() { - cache.record(&hash, &merge_policies(&verifiers)); - } - return Ok(()); - } - - let rendered: Vec = violations - .iter() - .map(|violation| { - serde_json::json!({ - "name": violation.name.to_string(), - "version": violation.version, - "code": violation.code, - "reason": violation.reason, - }) - }) - .collect(); - Err(VerifyFailure::Violations(rendered)) -} - -/// Render input-lockfile policy violations into the inline response -/// header (`{ "violations": [...] }`, no files following) so the client -/// rebuilds the identical `VerifyError` and aborts the same way the local -/// gate would. -fn violation_response(violations: &[serde_json::Value]) -> Response { - let header = serde_json::json!({ "violations": violations }); - // No files follow a verification failure: just the end-of-stream - // marker so the client's frame parser terminates cleanly. - let files_payload = empty_files_payload(); - finish_inline_response(&header, &files_payload) -} - -/// Merge every active verifier's policy snapshot into one bag, the key -/// the verdict cache stores alongside the lockfile hash. Later verifiers -/// overwrite earlier ones on a shared key — mirrors the local cache's -/// `merge_policies` so a verdict recorded here is comparable to one the -/// client's own cache would write. -fn merge_policies( - verifiers: &[Arc], -) -> serde_json::Map { - let mut merged = serde_json::Map::new(); - for verifier in verifiers { - for (key, value) in verifier.policy() { - merged.insert(key.clone(), value.clone()); - } - } - merged -} - -/// The binary file frames the install response embeds: a 2-byte `{}` JSON -/// header (length-prefixed) followed by one -/// `[64-byte digest][u32 size][1-byte exec][content]` frame per file, -/// terminated by 64 zero bytes. Reads each file's content from the store -/// by digest; an `Err` is a ready-made error response. -fn build_files_payload<'a>( - store_dir: &StoreDir, - files: impl Iterator, -) -> Result, (StatusCode, String)> { - let mut payload = empty_files_payload_prefix(); - for (digest, executable) in files { - let mode = if executable { 0o755 } else { 0o644 }; - let Some(path) = store_dir.cas_file_path_by_mode(digest, mode) else { - return Err(( - StatusCode::INTERNAL_SERVER_ERROR, - "could not resolve file path".to_string(), - )); - }; - let content = match std::fs::read(&path) { - Ok(content) => content, - Err(err) => { - return Err((StatusCode::INTERNAL_SERVER_ERROR, format!("{digest}: {err}"))); - } - }; - let Some(digest_bytes) = hex_to_bytes(digest) else { - return Err((StatusCode::BAD_REQUEST, "invalid digest".to_string())); - }; - // The wire framing encodes the size as a u32; a >4 GiB file would - // truncate. npm files never approach this, but fail cleanly rather - // than corrupt the stream. - let Ok(content_len) = u32::try_from(content.len()) else { - return Err(( - StatusCode::INTERNAL_SERVER_ERROR, - format!("{digest}: file too large for the protocol"), - )); - }; - payload.extend_from_slice(&digest_bytes); - payload.extend_from_slice(&content_len.to_be_bytes()); - payload.push(u8::from(executable)); - payload.extend_from_slice(&content); - } - payload.extend_from_slice(&[0u8; 64]); - Ok(payload) -} - -/// The leading 2-byte `{}` JSON header every files payload starts with. -fn empty_files_payload_prefix() -> Vec { - let mut prefix = Vec::new(); - prefix.extend_from_slice(&2u32.to_be_bytes()); - prefix.extend_from_slice(b"{}"); - prefix -} - -/// A files payload carrying no files — the header prefix plus the -/// end-of-stream marker. Used when an `inlineFiles` response has only -/// metadata (a `--lockfile-only` resolve or a verification failure). -fn empty_files_payload() -> Vec { - let mut payload = empty_files_payload_prefix(); - payload.extend_from_slice(&[0u8; 64]); - payload -} - -fn json_error(status: StatusCode, message: &str) -> Response { - let body = serde_json::json!({ "error": message }).to_string(); - Response::builder() - .status(status) - .header(header::CONTENT_TYPE, "application/json") - .body(Body::from(body)) - .expect("static json error response is always valid") -} - -/// Decode a 64-byte (128 hex char) digest into raw bytes. Returns -/// `None` on a malformed length or non-hex byte. -fn hex_to_bytes(hex: &str) -> Option<[u8; 64]> { - if hex.len() != 128 { - return None; - } - let bytes = hex.as_bytes(); - let mut out = [0u8; 64]; - for (i, slot) in out.iter_mut().enumerate() { - *slot = (hex_val(bytes[2 * i])? << 4) | hex_val(bytes[2 * i + 1])?; - } - Some(out) -} - -fn hex_val(byte: u8) -> Option { - match byte { - b'0'..=b'9' => Some(byte - b'0'), - b'a'..=b'f' => Some(byte - b'a' + 10), - _ => None, - } -} diff --git a/pnpr/crates/pnpr/src/install_accelerator/diff.rs b/pnpr/crates/pnpr/src/install_accelerator/diff.rs deleted file mode 100644 index d45fc3b8ac..0000000000 --- a/pnpr/crates/pnpr/src/install_accelerator/diff.rs +++ /dev/null @@ -1,157 +0,0 @@ -//! File-level diff between a resolved dependency tree and what the -//! client already has in its content-addressable store. -//! -//! Given a resolved lockfile, the client's store integrities, and the -//! server's store index, it computes which individual files the client -//! is missing — -//! deduplicated by `(digest, executable)` — plus the per-package -//! msgpack index entries the client needs to write into its own store -//! index before a headless install. - -use std::collections::{HashMap, HashSet}; - -use pacquet_store_dir::{ - PackageFilesIndex, StoreIndex, StoreIndexError, encode_package_files_index, -}; - -/// One resolved package the diff considers, distilled from the -/// lockfile's `packages` map. -pub struct ResolvedPackage { - /// SRI integrity string, e.g. `sha512-...`. - pub integrity: String, - /// Package id without the peer-deps suffix, e.g. `foo@1.0.0`. - pub pkg_id: String, -} - -/// A file the client's store is missing. -pub struct MissingFile { - /// Lowercase sha512 hex digest (no `sha512-` prefix). - pub digest: String, - pub executable: bool, -} - -/// Pre-packed store-index entry forwarded to the client (`I` line). -pub struct PackageIndexEntry { - pub integrity: String, - pub pkg_id: String, - /// msgpackr-records bytes ready for the client's `StoreIndex.setRawMany`. - pub raw: Vec, -} - -#[derive(Default)] -pub struct Stats { - pub total_packages: u64, - pub already_in_store: u64, - pub packages_to_fetch: u64, - pub files_in_new_packages: u64, - pub files_already_in_cafs: u64, - pub files_to_download: u64, - pub download_bytes: u64, -} - -pub struct DiffResult { - pub missing_files: Vec, - pub package_index: Vec, - pub stats: Stats, -} - -struct IntegrityEntry { - decoded: PackageFilesIndex, - /// Re-encoded msgpackr-records buffer for the client. - raw: Vec, -} - -fn is_executable(mode: u32) -> bool { - mode & 0o111 != 0 -} - -/// Build a map from SRI integrity to its decoded files index and a -/// re-encoded msgpackr-records buffer, restricted to the integrities -/// the diff actually needs (the client's existing packages plus the -/// newly resolved ones). Re-encoding guarantees the buffer is in the -/// msgpackr-records shape the pnpm client's store index reads, no -/// matter whether pacquet wrote the row as plain msgpack. -/// -/// The server's store index keys are `{integrity}\t{pkgId}`; we key by -/// the integrity half and keep the first occurrence, matching -/// `buildIntegrityIndex` in the TypeScript proof of concept. -fn build_integrity_index( - store: &StoreIndex, - needed: &HashSet, -) -> Result, StoreIndexError> { - let mut index = HashMap::new(); - for key in store.keys()? { - let Some((integrity, _pkg_id)) = key.split_once('\t') else { continue }; - if !needed.contains(integrity) || index.contains_key(integrity) { - continue; - } - let Some(decoded) = store.get(&key)? else { continue }; - let Ok(raw) = encode_package_files_index(&decoded) else { continue }; - index.insert(integrity.to_string(), IntegrityEntry { decoded, raw }); - } - Ok(index) -} - -/// Compute the file-level diff. Mirrors `computeDiff` in the -/// TypeScript proof of concept: union the client's existing file digests, then -/// for every resolved package not already in the client's store emit -/// the files it doesn't yet have (deduped across the whole response). -pub fn compute_diff( - store: &StoreIndex, - packages: &[ResolvedPackage], - store_integrities: &[String], -) -> Result { - let mut needed: HashSet = store_integrities.iter().cloned().collect(); - for pkg in packages { - needed.insert(pkg.integrity.clone()); - } - let index = build_integrity_index(store, &needed)?; - - let client_integrities: HashSet<&str> = store_integrities.iter().map(String::as_str).collect(); - - // Digests (and their exec flag) the client already has on disk. - let mut client_digests: HashSet<(String, bool)> = HashSet::new(); - for integrity in store_integrities { - let Some(entry) = index.get(integrity) else { continue }; - for file in entry.decoded.files.values() { - client_digests.insert((file.digest.clone(), is_executable(file.mode))); - } - } - - let mut stats = Stats::default(); - let mut missing_files = Vec::new(); - let mut package_index = Vec::new(); - - for pkg in packages { - stats.total_packages += 1; - - if client_integrities.contains(pkg.integrity.as_str()) { - stats.already_in_store += 1; - continue; - } - - let Some(entry) = index.get(&pkg.integrity) else { continue }; - stats.packages_to_fetch += 1; - - for file in entry.decoded.files.values() { - stats.files_in_new_packages += 1; - let executable = is_executable(file.mode); - let key = (file.digest.clone(), executable); - if client_digests.insert(key) { - stats.files_to_download += 1; - stats.download_bytes += file.size; - missing_files.push(MissingFile { digest: file.digest.clone(), executable }); - } else { - stats.files_already_in_cafs += 1; - } - } - - package_index.push(PackageIndexEntry { - integrity: pkg.integrity.clone(), - pkg_id: pkg.pkg_id.clone(), - raw: entry.raw.clone(), - }); - } - - Ok(DiffResult { missing_files, package_index, stats }) -} diff --git a/pnpr/crates/pnpr/src/install_accelerator/grant_table.rs b/pnpr/crates/pnpr/src/install_accelerator/grant_table.rs deleted file mode 100644 index a347669677..0000000000 --- a/pnpr/crates/pnpr/src/install_accelerator/grant_table.rs +++ /dev/null @@ -1,108 +0,0 @@ -//! Per-`(user, name@version)` allow-list gating externally-resolved -//! private content ([pnpm/pnpm#12184](https://github.com/pnpm/pnpm/issues/12184)): -//! the store dedups the bytes globally, but possession must not authorize -//! a user the owning registry never cleared. Backed by SQLite (WAL) like -//! [`super::verdict_cache::VerdictCache`]; every method is best-effort (a -//! DB error never fails the request, at worst one extra re-verify). - -use std::{ - path::Path, - sync::Mutex, - time::{Duration, SystemTime, UNIX_EPOCH}, -}; - -use rusqlite::Connection; - -/// Soft cap on stored grants; the oldest rows (by `granted_at_ms`) are -/// evicted past this. -const MAX_ROWS: i64 = 100_000; - -/// Concurrency-safe store of per-`(user, name@version)` access grants. -pub(crate) struct GrantTable { - conn: Mutex, -} - -impl GrantTable { - /// Open (creating if needed) the grant database at `path`. - pub(crate) fn open(path: &Path) -> rusqlite::Result { - let conn = Connection::open(path)?; - conn.busy_timeout(Duration::from_secs(5))?; - conn.execute_batch( - "PRAGMA journal_mode=WAL; - CREATE TABLE IF NOT EXISTS grants ( - user TEXT NOT NULL, - pkg TEXT NOT NULL, - granted_at_ms INTEGER NOT NULL, - PRIMARY KEY (user, pkg) - );", - )?; - Ok(Self { conn: Mutex::new(conn) }) - } - - /// Whether `(user, pkg)` holds a grant still within `ttl` (`None` = - /// permanent). `pkg` is the `name@version` package id. - pub(crate) fn is_granted(&self, user: &str, pkg: &str, ttl: Option) -> bool { - let conn = self.conn.lock().expect("grant table poisoned"); - let granted_at: Option = conn - .query_row( - "SELECT granted_at_ms FROM grants WHERE user = ?1 AND pkg = ?2", - rusqlite::params![user, pkg], - |row| row.get(0), - ) - .ok(); - let Some(granted_at) = granted_at else { - return false; - }; - match ttl { - None => true, - Some(ttl) => now_ms().saturating_sub(granted_at) <= ttl.as_millis() as i64, - } - } - - /// Record (or refresh) a grant for `(user, pkg)`. Best-effort. - pub(crate) fn record(&self, user: &str, pkg: &str) { - let now = now_ms(); - let conn = self.conn.lock().expect("grant table poisoned"); - let _ = conn.execute( - "INSERT INTO grants (user, pkg, granted_at_ms) VALUES (?1, ?2, ?3) - ON CONFLICT(user, pkg) DO UPDATE SET granted_at_ms = excluded.granted_at_ms", - rusqlite::params![user, pkg, now], - ); - evict_overflow(&conn); - } - - /// Clear-on-discovery: drop every grant `user` holds for `name`, - /// across all versions (matched by the `name@` prefix, since `pkg` is - /// `name@version`). Best-effort. - pub(crate) fn clear_package(&self, user: &str, name: &str) { - let with_at = format!("{name}@"); - let prefix_len = with_at.chars().count() as i64; - let conn = self.conn.lock().expect("grant table poisoned"); - let _ = conn.execute( - "DELETE FROM grants WHERE user = ?1 AND substr(pkg, 1, ?2) = ?3", - rusqlite::params![user, prefix_len, with_at], - ); - } -} - -/// Trim the oldest rows past [`MAX_ROWS`], ordered by `granted_at_ms`. -fn evict_overflow(conn: &Connection) { - let _ = conn.execute( - "DELETE FROM grants WHERE rowid IN ( - SELECT rowid FROM grants - ORDER BY granted_at_ms DESC - LIMIT -1 OFFSET ?1 - )", - rusqlite::params![MAX_ROWS], - ); -} - -fn now_ms() -> i64 { - SystemTime::now() - .duration_since(UNIX_EPOCH) - .map(|elapsed| elapsed.as_millis() as i64) - .unwrap_or(0) -} - -#[cfg(test)] -mod tests; diff --git a/pnpr/crates/pnpr/src/install_accelerator/grant_table/tests.rs b/pnpr/crates/pnpr/src/install_accelerator/grant_table/tests.rs deleted file mode 100644 index f04734abed..0000000000 --- a/pnpr/crates/pnpr/src/install_accelerator/grant_table/tests.rs +++ /dev/null @@ -1,74 +0,0 @@ -use std::{thread::sleep, time::Duration}; - -use tempfile::TempDir; - -use super::GrantTable; - -fn open() -> (GrantTable, TempDir) { - let dir = TempDir::new().expect("tempdir"); - let table = GrantTable::open(&dir.path().join("grants.sqlite")).expect("open grant table"); - (table, dir) -} - -#[test] -fn records_and_reads_a_grant() { - let (table, _dir) = open(); - assert!(!table.is_granted("alice", "@acme/foo@1.0.0", None)); - table.record("alice", "@acme/foo@1.0.0"); - assert!(table.is_granted("alice", "@acme/foo@1.0.0", None)); - // A grant is per-user and per-version. - assert!(!table.is_granted("bob", "@acme/foo@1.0.0", None)); - assert!(!table.is_granted("alice", "@acme/foo@2.0.0", None)); -} - -#[test] -fn clear_package_drops_every_version_for_that_user_only() { - let (table, _dir) = open(); - table.record("alice", "@acme/foo@1.0.0"); - table.record("alice", "@acme/foo@2.0.0"); - table.record("alice", "@acme/bar@1.0.0"); - table.record("bob", "@acme/foo@1.0.0"); - - table.clear_package("alice", "@acme/foo"); - - assert!(!table.is_granted("alice", "@acme/foo@1.0.0", None)); - assert!(!table.is_granted("alice", "@acme/foo@2.0.0", None)); - // A different package the same user holds is untouched. - assert!(table.is_granted("alice", "@acme/bar@1.0.0", None)); - // Another user's grant for the same package is untouched. - assert!(table.is_granted("bob", "@acme/foo@1.0.0", None)); -} - -#[test] -fn clear_package_does_not_prefix_match_a_sibling_name() { - let (table, _dir) = open(); - // `foo` must not clear `foo-bar` — the `@`-delimited prefix guards it. - table.record("alice", "foo@1.0.0"); - table.record("alice", "foo-bar@1.0.0"); - table.clear_package("alice", "foo"); - assert!(!table.is_granted("alice", "foo@1.0.0", None)); - assert!(table.is_granted("alice", "foo-bar@1.0.0", None)); -} - -#[test] -fn a_ttl_expires_an_old_grant() { - let (table, _dir) = open(); - table.record("alice", "foo@1.0.0"); - // Still valid under a generous TTL. - assert!(table.is_granted("alice", "foo@1.0.0", Some(Duration::from_secs(60)))); - // Expired under a zero TTL once any time has passed. - sleep(Duration::from_millis(5)); - assert!(!table.is_granted("alice", "foo@1.0.0", Some(Duration::from_millis(1)))); -} - -#[test] -fn grants_persist_across_reopen() { - let dir = TempDir::new().expect("tempdir"); - let path = dir.path().join("grants.sqlite"); - { - let table = GrantTable::open(&path).expect("open"); - table.record("alice", "foo@1.0.0"); - } - let reopened = GrantTable::open(&path).expect("reopen"); - assert!(reopened.is_granted("alice", "foo@1.0.0", None)); -} diff --git a/pnpr/crates/pnpr/src/install_accelerator/public_packages.rs b/pnpr/crates/pnpr/src/install_accelerator/public_packages.rs deleted file mode 100644 index 47611e8366..0000000000 --- a/pnpr/crates/pnpr/src/install_accelerator/public_packages.rs +++ /dev/null @@ -1,95 +0,0 @@ -//! Global set of **anonymously-readable** package names, so the per-user -//! grant table never gates a public package -//! ([pnpm/pnpm#12184](https://github.com/pnpm/pnpm/issues/12184)). A -//! forwarded token matching a registry only means pnpr fetched a package -//! with it, not that the package is private; in a mixed proxy that would -//! gate public content per user too. Populated lazily by one anonymous -//! probe per name, so a public package costs one round trip fleet-wide. -//! SQLite (WAL) like [`super::grant_table::GrantTable`]; best-effort. - -use std::{ - path::Path, - sync::Mutex, - time::{Duration, SystemTime, UNIX_EPOCH}, -}; - -use rusqlite::Connection; - -/// Soft cap on classified names; the oldest rows (by `classified_at_ms`) -/// are evicted past this. -const MAX_ROWS: i64 = 100_000; - -/// Concurrency-safe set of anonymously-readable package names. -pub(crate) struct PublicPackages { - conn: Mutex, -} - -impl PublicPackages { - /// Open (creating if needed) the classification database at `path`. - pub(crate) fn open(path: &Path) -> rusqlite::Result { - let conn = Connection::open(path)?; - conn.busy_timeout(Duration::from_secs(5))?; - conn.execute_batch( - "PRAGMA journal_mode=WAL; - CREATE TABLE IF NOT EXISTS public_packages ( - name TEXT PRIMARY KEY, - classified_at_ms INTEGER NOT NULL - );", - )?; - Ok(Self { conn: Mutex::new(conn) }) - } - - /// Whether `name` was classified anonymously-readable within `ttl` - /// (`None` = permanent). Keyed by name (readability is per-name). - pub(crate) fn is_public(&self, name: &str, ttl: Option) -> bool { - let conn = self.conn.lock().expect("public packages poisoned"); - let classified_at: Option = conn - .query_row( - "SELECT classified_at_ms FROM public_packages WHERE name = ?1", - rusqlite::params![name], - |row| row.get(0), - ) - .ok(); - let Some(classified_at) = classified_at else { - return false; - }; - match ttl { - None => true, - Some(ttl) => now_ms().saturating_sub(classified_at) <= ttl.as_millis() as i64, - } - } - - /// Record (or refresh) `name` as anonymously readable. Best-effort. - pub(crate) fn record(&self, name: &str) { - let now = now_ms(); - let conn = self.conn.lock().expect("public packages poisoned"); - let _ = conn.execute( - "INSERT INTO public_packages (name, classified_at_ms) VALUES (?1, ?2) - ON CONFLICT(name) DO UPDATE SET classified_at_ms = excluded.classified_at_ms", - rusqlite::params![name, now], - ); - evict_overflow(&conn); - } -} - -/// Trim the oldest rows past [`MAX_ROWS`], ordered by `classified_at_ms`. -fn evict_overflow(conn: &Connection) { - let _ = conn.execute( - "DELETE FROM public_packages WHERE name IN ( - SELECT name FROM public_packages - ORDER BY classified_at_ms DESC - LIMIT -1 OFFSET ?1 - )", - rusqlite::params![MAX_ROWS], - ); -} - -fn now_ms() -> i64 { - SystemTime::now() - .duration_since(UNIX_EPOCH) - .map(|elapsed| elapsed.as_millis() as i64) - .unwrap_or(0) -} - -#[cfg(test)] -mod tests; diff --git a/pnpr/crates/pnpr/src/install_accelerator/public_packages/tests.rs b/pnpr/crates/pnpr/src/install_accelerator/public_packages/tests.rs deleted file mode 100644 index ad3e2b1cbf..0000000000 --- a/pnpr/crates/pnpr/src/install_accelerator/public_packages/tests.rs +++ /dev/null @@ -1,42 +0,0 @@ -use std::{thread::sleep, time::Duration}; - -use tempfile::TempDir; - -use super::PublicPackages; - -fn open() -> (PublicPackages, TempDir) { - let dir = TempDir::new().expect("tempdir"); - let table = PublicPackages::open(&dir.path().join("public.sqlite")).expect("open"); - (table, dir) -} - -#[test] -fn records_and_reads_a_classification() { - let (table, _dir) = open(); - assert!(!table.is_public("lodash", None)); - table.record("lodash"); - assert!(table.is_public("lodash", None)); - // Classification is per name, not per other name. - assert!(!table.is_public("react", None)); -} - -#[test] -fn a_ttl_expires_an_old_classification() { - let (table, _dir) = open(); - table.record("lodash"); - assert!(table.is_public("lodash", Some(Duration::from_secs(60)))); - sleep(Duration::from_millis(5)); - assert!(!table.is_public("lodash", Some(Duration::from_millis(1)))); -} - -#[test] -fn classifications_persist_across_reopen() { - let dir = TempDir::new().expect("tempdir"); - let path = dir.path().join("public.sqlite"); - { - let table = PublicPackages::open(&path).expect("open"); - table.record("lodash"); - } - let reopened = PublicPackages::open(&path).expect("reopen"); - assert!(reopened.is_public("lodash", None)); -} diff --git a/pnpr/crates/pnpr/src/install_accelerator/tests.rs b/pnpr/crates/pnpr/src/install_accelerator/tests.rs deleted file mode 100644 index 64763d459c..0000000000 --- a/pnpr/crates/pnpr/src/install_accelerator/tests.rs +++ /dev/null @@ -1,354 +0,0 @@ -//! Tests for the pnpr-as-authority access gate the install accelerator -//! applies before serving a package's files: a digest in the store is not -//! a bearer capability, so [`deny_local_policy`] checks every locally- -//! authoritative package against pnpr's own `packages:` policy. (The -//! upstream-as-authority regime — forwarded-credential content gated per -//! user — is exercised end to end in the pnpr-client integration tests.) - -use std::collections::HashSet; - -use axum::http::StatusCode; -use pacquet_network::AuthHeaders; -use tempfile::TempDir; - -use super::{ - InstallAccelerator, authorize_served_packages, authorize_upstream_package, deny_local_policy, - diff::PackageIndexEntry, protocol::InstallRequest, -}; -use crate::policy::{AccessList, Identity, PackagePolicies, PackagePolicy}; - -/// The `name@1.0.0` package id a served entry would carry. -fn served(name: &str) -> String { - format!("{name}@1.0.0") -} - -/// Run the local-policy gate over a single served package id. -fn deny( - policies: &PackagePolicies, - identity: &Identity, - pkg_id: &str, -) -> Option { - deny_local_policy(policies, identity, std::iter::once(pkg_id)) -} - -fn anonymous() -> Identity { - Identity::Anonymous -} - -fn user() -> Identity { - Identity::User { username: "alice".to_string() } -} - -/// `registry_mock_defaults` gates `@private/*` to `$authenticated`. -fn policies() -> PackagePolicies { - PackagePolicies::registry_mock_defaults() -} - -/// `@team/*` is restricted to the single user `alice`, so an authenticated -/// caller who isn't `alice` is forbidden rather than merely unauthenticated. -fn team_owned_by_alice() -> PackagePolicies { - let team = - PackagePolicy::new("@team/*", AccessList::parse("alice"), AccessList::parse("alice")) - .expect("pattern compiles"); - let rest = - PackagePolicy::new("**", AccessList::parse("$all"), AccessList::parse("$authenticated")) - .expect("pattern compiles"); - PackagePolicies::new(vec![team, rest]) -} - -#[test] -fn anonymous_caller_is_denied_a_private_package() { - let denied = deny(&policies(), &anonymous(), &served("@private/foo")); - assert_eq!(denied.map(|response| response.status()), Some(StatusCode::UNAUTHORIZED)); -} - -#[test] -fn authenticated_caller_is_allowed_a_private_package() { - let denied = deny(&policies(), &user(), &served("@private/foo")); - assert!(denied.is_none()); -} - -#[test] -fn anonymous_caller_is_allowed_a_public_package() { - let denied = deny(&policies(), &anonymous(), &served("is-positive")); - assert!(denied.is_none()); -} - -#[test] -fn authenticated_caller_outside_the_allowed_set_is_forbidden() { - let bob = Identity::User { username: "bob".to_string() }; - let denied = deny(&team_owned_by_alice(), &bob, &served("@team/foo")); - assert_eq!(denied.map(|response| response.status()), Some(StatusCode::FORBIDDEN)); -} - -#[test] -fn authenticated_caller_in_the_allowed_set_is_allowed() { - let denied = deny(&team_owned_by_alice(), &user(), &served("@team/foo")); - assert!(denied.is_none()); -} - -// -------------------------------------------------------------------- -// Upstream-as-authority regime: forwarded-credential content gated per -// user against the owning external registry, plus the grant table. -// -------------------------------------------------------------------- - -/// Build a real [`InstallAccelerator`] (store/cache dirs + grant table) -/// under `storage` for the dispatch tests. -fn accelerator(storage: &std::path::Path) -> InstallAccelerator { - let addr = "127.0.0.1:4873".parse().expect("addr parses"); - let config = crate::config::Config::proxy(addr, storage.to_path_buf()); - InstallAccelerator::build(&config) -} - -/// An [`AuthHeaders`] carrying a single default-registry credential for -/// `registry`, mirroring how a client forwards one upstream token. -fn auth_for(registry: &str, header: &str) -> AuthHeaders { - AuthHeaders::from_creds_map([(String::new(), header.to_string())], Some(registry)) -} - -fn entry(pkg_id: &str) -> PackageIndexEntry { - PackageIndexEntry { - integrity: "sha512-x".to_string(), - pkg_id: pkg_id.to_string(), - raw: Vec::new(), - } -} - -fn is_granted(acc: &InstallAccelerator, user: &str, pkg: &str) -> bool { - acc.grant_table.as_ref().expect("grant table opened").is_granted(user, pkg, None) -} - -fn is_public(acc: &InstallAccelerator, name: &str) -> bool { - acc.public_packages.as_ref().expect("public set opened").is_public(name, None) -} - -fn fresh(pkg_ids: &[&str]) -> HashSet { - pkg_ids.iter().map(|id| id.to_string()).collect() -} - -#[tokio::test] -async fn a_fresh_upstream_fetch_is_allowed_and_records_a_grant() { - let tmp = TempDir::new().unwrap(); - let acc = accelerator(tmp.path()); - let auth = auth_for("https://reg.test/", "Bearer t"); - let identity = Identity::User { username: "alice".to_string() }; - - let denied = authorize_upstream_package( - &acc, - &identity, - &auth, - &fresh(&["foo@1.0.0"]), - "https://reg.test/", - "foo", - "foo@1.0.0", - ) - .await; - - assert!(denied.is_none()); - assert!(is_granted(&acc, "alice", "foo@1.0.0")); -} - -#[tokio::test] -async fn a_granted_cache_hit_is_served_without_touching_the_upstream() { - let tmp = TempDir::new().unwrap(); - let acc = accelerator(tmp.path()); - acc.grant_table.as_ref().unwrap().record("alice", "foo@1.0.0"); - let auth = auth_for("https://reg.test/", "Bearer t"); - let identity = Identity::User { username: "alice".to_string() }; - - // An unreachable registry: a network probe would resolve to a 502 - // denial, so a pass here proves the grant short-circuited it. - let denied = authorize_upstream_package( - &acc, - &identity, - &auth, - &fresh(&[]), - "http://127.0.0.1:1/", - "foo", - "foo@1.0.0", - ) - .await; - - assert!(denied.is_none()); -} - -#[tokio::test] -async fn an_ungranted_private_cache_hit_reverifies_then_records() { - let mut server = mockito::Server::new_async().await; - // Private: the registry withholds the packument anonymously, then - // serves it once the caller's credential is attached. The two mocks - // are mutually exclusive on the `authorization` header. - let anon = server - .mock("GET", "/foo") - .match_header("authorization", mockito::Matcher::Missing) - .with_status(401) - .create_async() - .await; - let authed = server - .mock("GET", "/foo") - .match_header("authorization", "Bearer t") - .with_status(200) - .with_body("{}") - .create_async() - .await; - let registry = format!("{}/", server.url()); - - let tmp = TempDir::new().unwrap(); - let acc = accelerator(tmp.path()); - let auth = auth_for(®istry, "Bearer t"); - let identity = Identity::User { username: "alice".to_string() }; - - let denied = authorize_upstream_package( - &acc, - &identity, - &auth, - &fresh(&[]), - ®istry, - "foo", - "foo@1.0.0", - ) - .await; - - assert!(denied.is_none()); - anon.assert_async().await; - authed.assert_async().await; - assert!(is_granted(&acc, "alice", "foo@1.0.0")); - // A private package must never be cached as public. - assert!(!is_public(&acc, "foo")); -} - -#[tokio::test] -async fn a_public_cache_hit_is_classified_once_then_served_for_free() { - let mut server = mockito::Server::new_async().await; - // Public: the registry serves the packument anonymously. Exactly one - // probe is expected across both authorize calls — the second is served - // from the global classification with no upstream contact. - let mock = - server.mock("GET", "/foo").with_status(200).with_body("{}").expect(1).create_async().await; - let registry = format!("{}/", server.url()); - - let tmp = TempDir::new().unwrap(); - let acc = accelerator(tmp.path()); - let auth = auth_for(®istry, "Bearer t"); - let alice = Identity::User { username: "alice".to_string() }; - - let first = - authorize_upstream_package(&acc, &alice, &auth, &fresh(&[]), ®istry, "foo", "foo@1.0.0") - .await; - assert!(first.is_none()); - assert!(is_public(&acc, "foo")); - // Public content records no per-user grant. - assert!(!is_granted(&acc, "alice", "foo@1.0.0")); - - // A different caller wanting a different cached version is served - // straight from the classification — no second probe. - let bob = Identity::User { username: "bob".to_string() }; - let second = - authorize_upstream_package(&acc, &bob, &auth, &fresh(&[]), ®istry, "foo", "foo@2.0.0") - .await; - assert!(second.is_none()); - - mock.assert_async().await; -} - -#[tokio::test] -async fn a_denied_reverify_clears_the_users_grants_and_denies() { - let mut server = mockito::Server::new_async().await; - let _mock = server.mock("GET", "/foo").with_status(403).create_async().await; - let registry = format!("{}/", server.url()); - - let tmp = TempDir::new().unwrap(); - let acc = accelerator(tmp.path()); - // A standing grant for another version the caller already held: a - // discovered `403` for the package must purge it (clear-on-discovery). - acc.grant_table.as_ref().unwrap().record("alice", "foo@2.0.0"); - let auth = auth_for(®istry, "Bearer t"); - let identity = Identity::User { username: "alice".to_string() }; - - let denied = authorize_upstream_package( - &acc, - &identity, - &auth, - &fresh(&[]), - ®istry, - "foo", - "foo@1.0.0", - ) - .await; - - assert_eq!(denied.map(|response| response.status()), Some(StatusCode::FORBIDDEN)); - assert!(!is_granted(&acc, "alice", "foo@2.0.0")); -} - -#[tokio::test] -async fn an_unreachable_upstream_during_reverify_is_a_bad_gateway() { - let tmp = TempDir::new().unwrap(); - let acc = accelerator(tmp.path()); - let auth = auth_for("http://127.0.0.1:1/", "Bearer t"); - let identity = Identity::User { username: "alice".to_string() }; - - // Port 1 refuses the connection, so neither the anonymous classify - // probe nor the authed re-verify can decide access. - let denied = authorize_upstream_package( - &acc, - &identity, - &auth, - &fresh(&[]), - "http://127.0.0.1:1/", - "foo", - "foo@1.0.0", - ) - .await; - - assert_eq!(denied.map(|response| response.status()), Some(StatusCode::BAD_GATEWAY)); -} - -#[tokio::test] -async fn a_forwarded_credential_routes_around_the_local_policy() { - // `@private/foo` is gated to `$authenticated` by the local policy, so - // an anonymous caller would be denied under pnpr-as-authority. With a - // forwarded credential it is upstream-as-authority instead, and a - // fresh fetch proves access — so it is served. - let tmp = TempDir::new().unwrap(); - let acc = accelerator(tmp.path()); - let registry = "https://reg.test/"; - let auth = auth_for(registry, "Bearer t"); - let request = InstallRequest { registry: Some(registry.to_string()), ..Default::default() }; - - let denied = authorize_served_packages( - &acc, - &policies(), - &Identity::Anonymous, - &request, - &auth, - &fresh(&["@private/foo@1.0.0"]), - &[entry("@private/foo@1.0.0")], - ) - .await; - - assert!(denied.is_none()); -} - -#[tokio::test] -async fn without_a_forwarded_credential_the_local_policy_still_applies() { - let tmp = TempDir::new().unwrap(); - let acc = accelerator(tmp.path()); - let request = - InstallRequest { registry: Some("https://reg.test/".to_string()), ..Default::default() }; - - // No forwarded credential ⇒ pnpr-as-authority ⇒ `@private/foo` is - // denied to an anonymous caller, exactly as the packument/tarball - // endpoints would deny it. - let denied = authorize_served_packages( - &acc, - &policies(), - &Identity::Anonymous, - &request, - &AuthHeaders::default(), - &fresh(&[]), - &[entry("@private/foo@1.0.0")], - ) - .await; - - assert_eq!(denied.map(|response| response.status()), Some(StatusCode::UNAUTHORIZED)); -} diff --git a/pnpr/crates/pnpr/src/lib.rs b/pnpr/crates/pnpr/src/lib.rs index d008daf6fd..7ce1eb54d6 100644 --- a/pnpr/crates/pnpr/src/lib.rs +++ b/pnpr/crates/pnpr/src/lib.rs @@ -10,10 +10,10 @@ mod auth; mod config; mod error; -mod install_accelerator; mod package_name; mod policy; mod publish; +mod resolver; mod s3; mod search; mod server; diff --git a/pnpr/crates/pnpr/src/main.rs b/pnpr/crates/pnpr/src/main.rs index 965b05028d..7a1323149b 100644 --- a/pnpr/crates/pnpr/src/main.rs +++ b/pnpr/crates/pnpr/src/main.rs @@ -26,7 +26,7 @@ struct Args { storage: Option, /// Override the proxy-cache path — the disposable mirror of - /// upstream registries plus the install-accelerator store. Point + /// upstream registries plus the resolver's cache. Point /// it at separate, ephemeral disk to keep published packages and /// cached upstream content on different volumes. #[arg(long)] diff --git a/pnpr/crates/pnpr/src/resolver.rs b/pnpr/crates/pnpr/src/resolver.rs new file mode 100644 index 0000000000..519319beab --- /dev/null +++ b/pnpr/crates/pnpr/src/resolver.rs @@ -0,0 +1,469 @@ +//! pnpr resolver: server-side dependency resolution exposed as an +//! additive, opt-in protocol alongside pnpr's npm-compatible API. The +//! handshake + endpoint are served under one base URL (the `pnprServer`). +//! +//! Two routes, built on pacquet's resolver: +//! +//! * `GET /-/pnpr` — capability handshake; advertises the supported +//! protocol versions so a client can negotiate or fail fast. +//! * `POST /v1/resolve` — resolve a project **against the registries +//! the client sends** (so the server uses the same source of truth as +//! the client), verify the client's input lockfile under the client's +//! policy, and return the resolved lockfile as a gzipped JSON body. +//! The client then fetches tarballs in parallel from the registries +//! like a normal install +//! ([pnpm/pnpm#12230](https://github.com/pnpm/pnpm/issues/12230)). +//! +//! pnpr is a stateless resolver: it stores no tarballs and serves no file +//! content. The client fetches every tarball directly from the registry +//! with its own credentials, so the registry enforces access on the +//! bytes; pnpr only shapes the lockfile. +//! +//! The client's `registry`, `namedRegistries`, `overrides`, and the +//! verification policy (`minimumReleaseAge`, `trustPolicy`, ...) drive +//! resolution and verification. When the client sends its on-disk +//! lockfile, the server verifies it under the client's policy before +//! resolving, then reuses it as the resolution seed (frozen → as-is; +//! non-frozen → reuse-and-update). A multi-project workspace is resolved +//! by reconstructing the workspace on disk (root manifest + +//! `pnpm-workspace.yaml` + member manifests) and letting pacquet's +//! install path discover and resolve every importer. The client also +//! forwards its per-registry credentials, so private dependencies resolve +//! as the caller. + +mod protocol; +mod resolve; +mod verdict_cache; + +use std::{ + collections::HashMap, + io::Write as _, + path::PathBuf, + sync::{Arc, Mutex, OnceLock}, + time::{Duration, Instant}, +}; + +use crate::config::Config as RegistryConfig; + +use axum::{ + body::{Body, Bytes}, + http::{StatusCode, header}, + response::Response, +}; +use flate2::{Compression, write::GzEncoder}; +use indexmap::IndexMap; +use pacquet_config::Config as PacquetConfig; +use pacquet_lockfile::Lockfile; +use pacquet_lockfile_verification::{collect_resolution_policy_violations, hash_lockfile}; +use pacquet_network::{AuthHeaders, ThrottledClient}; +use pacquet_package_manager::build_resolution_verifiers; +use pacquet_resolving_npm_resolver::{InMemoryPackageMetaCache, PackageMetaCache}; +use pacquet_resolving_resolver_base::ResolutionVerifier; +use pacquet_store_dir::StoreDir; +use sha2::{Digest, Sha256}; + +use self::{protocol::ResolveRequest, verdict_cache::VerdictCache}; + +/// Per-server engine backing the pnpr install endpoint: it holds the +/// store, cache, and HTTP client used to resolve a client's project. The +/// store and cache dirs are fixed for the server's lifetime; the +/// *registries* come from each client request (the server resolves +/// against the client's registries, not its own), so the `&'static Config` +/// the install path requires is interned per distinct client registry +/// configuration rather than leaked once or per request. +/// +/// Held lazily in a [`OnceLock`] on the server's state so servers that +/// never receive such a request pay nothing, and so each server in +/// a multi-server test process keeps its own store. +pub(crate) struct Resolver { + store_dir: StoreDir, + cache_dir: PathBuf, + client: Arc, + resolution_cache: Mutex>, + resolution_cache_ttl: Duration, + /// One leaked `Config` per distinct client registry configuration, + /// keyed by its canonical JSON. Bounds the leak to the number of + /// distinct client setups the server sees (typically one). + configs: Mutex>, + /// SQLite-backed whole-lockfile verification verdict cache. `None` + /// only if the database couldn't be opened — verification then runs + /// every time (uncached) rather than failing the server. + verdict_cache: Option, +} + +struct CachedResolution { + lockfile: Lockfile, + inserted: Instant, +} + +impl Resolver { + pub(crate) fn get_or_init<'a>( + cell: &'a OnceLock, + config: &RegistryConfig, + ) -> &'a Resolver { + cell.get_or_init(|| Resolver::build(config)) + } + + fn build(config: &RegistryConfig) -> Resolver { + let store_dir = config.cache_storage.join("pnpr-store"); + let cache_dir = config.cache_storage.join("pnpr-cache"); + // Best-effort: a real failure here (e.g. a permission problem) + // resurfaces with a precise error on the first store/cache write + // during resolution, so there's nothing actionable to report yet. + let _ = std::fs::create_dir_all(&store_dir); + let _ = std::fs::create_dir_all(&cache_dir); + let verdict_cache = VerdictCache::open(&cache_dir.join("lockfile-verdicts.sqlite")).ok(); + Resolver { + store_dir: StoreDir::new(store_dir), + cache_dir, + client: Arc::new(ThrottledClient::new_for_installs()), + resolution_cache: Mutex::new(HashMap::new()), + resolution_cache_ttl: config.packument_ttl, + configs: Mutex::new(HashMap::new()), + verdict_cache, + } + } + + /// Resolve (or build + intern) the `&'static Config` for a request's + /// registry configuration. Pacquet's install path resolves against + /// `config.registry` / `named_registries` / `overrides`, so a request + /// from a client with a different registry setup gets its own Config. + fn config_for(&self, request: &ResolveRequest) -> &'static PacquetConfig { + let registry = + request.registry.clone().unwrap_or_else(|| "https://registry.npmjs.org/".to_string()); + let registry = if registry.ends_with('/') { registry } else { format!("{registry}/") }; + let overrides: Option> = + request.overrides.as_ref().and_then(|value| serde_json::from_value(value.clone()).ok()); + + let key = serde_json::json!({ + "registry": registry, + "namedRegistries": request.named_registries, + "overrides": overrides, + "minimumReleaseAge": request.minimum_release_age, + "minimumReleaseAgeExclude": request.minimum_release_age_exclude, + "minimumReleaseAgeIgnoreMissingTime": request.minimum_release_age_ignore_missing_time, + "trustPolicy": request.trust_policy, + "trustPolicyExclude": request.trust_policy_exclude, + "trustPolicyIgnoreAfter": request.trust_policy_ignore_after, + }) + .to_string(); + + let mut configs = self.configs.lock().expect("config cache poisoned"); + if let Some(config) = configs.get(&key) { + return config; + } + + let mut config = PacquetConfig::new(); + config.store_dir = self.store_dir.clone(); + config.cache_dir = self.cache_dir.clone(); + config.registry = registry; + config.named_registries = request.named_registries.clone(); + config.overrides = overrides; + config.modules_dir = PathBuf::from("node_modules"); + config.lockfile = true; + config.verify_store_integrity = true; + // The client's verification policy drives both the input-lockfile + // verifier and the resolver's pick-time `minimumReleaseAge` / + // `trustPolicy` checks, so newly-resolved entries are held to the + // same policy as the reused ones. + config.minimum_release_age = request.minimum_release_age; + config.minimum_release_age_exclude = request.minimum_release_age_exclude.clone(); + if let Some(ignore_missing_time) = request.minimum_release_age_ignore_missing_time { + config.minimum_release_age_ignore_missing_time = ignore_missing_time; + } + config.trust_policy = request.trust_policy; + config.trust_policy_exclude = request.trust_policy_exclude.clone(); + config.trust_policy_ignore_after = request.trust_policy_ignore_after; + let config: &'static PacquetConfig = config.leak(); + configs.insert(key, config); + config + } +} + +/// Handle `POST /v1/resolve`: verify the client's input lockfile under +/// the client's policy, resolve against the client's registries, and +/// return the resolved lockfile. No tarball leaves the server — the +/// client fetches them itself. +pub(crate) async fn handle_resolve(runtime: &Resolver, body: Bytes) -> Response { + let request: ResolveRequest = match serde_json::from_slice(&body) { + Ok(request) => request, + Err(err) => return json_error(StatusCode::BAD_REQUEST, &err.to_string()), + }; + + // Resolve against the client's registries, not the server's own. + let config = runtime.config_for(&request); + + // The caller's forwarded upstream credentials, threaded through + // resolve/verify but kept out of the interned `config` so it never + // leaks a `&'static Config` per user. + let request_auth = Arc::new(AuthHeaders::from_map( + request.auth_headers.iter().map(|(uri, value)| (uri.clone(), value.clone())).collect(), + )); + + // Verify the *input* lockfile under the client's policy before + // resolving ([pnpm/pnpm#12139](https://github.com/pnpm/pnpm/issues/12139)). + // The client skips its own `verifyLockfileResolutions` whenever a + // pnpr server is configured, so this is the only place the + // committed/reused entries get checked. A true first install sends + // no lockfile — nothing to verify. `trustLockfile` is the client's + // opt-out (mirrors the local path's `--trust-lockfile`). Freshly- + // resolved entries are held to the same policy by the resolver's + // pick-time gate (the policy is wired into `config`). + if !request.trust_lockfile + && let Some(input_lockfile) = request.lockfile.as_ref() + && let Err(failure) = + verify_input_lockfile(runtime, config, &request_auth, input_lockfile).await + { + return match failure { + VerifyFailure::Internal(response) => response, + VerifyFailure::Violations(violations) => violation_response(&violations), + }; + } + + let lockfile = if let Some(lockfile) = resolve::fresh_frozen_input_lockfile(config, &request) { + lockfile + } else { + let resolution_cache_key = if request.auth_headers.is_empty() && request.lockfile.is_none() + { + resolution_cache_key(config, &request) + } else { + None + }; + if let Some(key) = resolution_cache_key.as_ref() + && let Some(lockfile) = cached_resolution(runtime, key) + { + lockfile + } else { + let lockfile = + match resolve::resolve(config, &runtime.client, &request, &request_auth).await { + Ok(lockfile) => lockfile, + Err(err) => { + return json_error(StatusCode::INTERNAL_SERVER_ERROR, &err.to_string()); + } + }; + if let Some(key) = resolution_cache_key { + store_resolution(runtime, key, &lockfile); + } + lockfile + } + }; + + resolve_response(&lockfile) +} + +const MAX_RESOLUTION_CACHE_ENTRIES: usize = 1024; + +fn cached_resolution(runtime: &Resolver, key: &str) -> Option { + if runtime.resolution_cache_ttl.is_zero() { + return None; + } + let mut cache = runtime.resolution_cache.lock().expect("resolution cache poisoned"); + match cache.get(key) { + Some(cached) if cached.inserted.elapsed() <= runtime.resolution_cache_ttl => { + Some(cached.lockfile.clone()) + } + Some(_) => { + cache.remove(key); + None + } + None => None, + } +} + +fn store_resolution(runtime: &Resolver, key: String, lockfile: &Lockfile) { + if runtime.resolution_cache_ttl.is_zero() { + return; + } + let mut cache = runtime.resolution_cache.lock().expect("resolution cache poisoned"); + if cache.len() >= MAX_RESOLUTION_CACHE_ENTRIES { + let ttl = runtime.resolution_cache_ttl; + cache.retain(|_, cached| cached.inserted.elapsed() <= ttl); + } + if cache.len() >= MAX_RESOLUTION_CACHE_ENTRIES + && let Some(oldest) = + cache.iter().min_by_key(|(_, cached)| cached.inserted).map(|(key, _)| key.clone()) + { + cache.remove(&oldest); + } + cache.insert(key, CachedResolution { lockfile: lockfile.clone(), inserted: Instant::now() }); +} + +fn resolution_cache_key(config: &PacquetConfig, request: &ResolveRequest) -> Option { + let projects: Vec = request + .projects_normalized() + .into_iter() + .map(|project| { + serde_json::json!({ + "dir": project.dir, + "dependencies": project.dependencies, + "devDependencies": project.dev_dependencies, + "optionalDependencies": project.optional_dependencies, + }) + }) + .collect(); + let input = serde_json::json!({ + "registry": &config.registry, + "namedRegistries": &request.named_registries, + "overrides": &request.overrides, + "projects": projects, + "lockfile": &request.lockfile, + "frozenLockfile": request.frozen_lockfile, + "preferFrozenLockfile": request.prefer_frozen_lockfile, + "ignoreManifestCheck": request.ignore_manifest_check, + "trustLockfile": request.trust_lockfile, + "minimumReleaseAge": request.minimum_release_age, + "minimumReleaseAgeExclude": &request.minimum_release_age_exclude, + "minimumReleaseAgeIgnoreMissingTime": request.minimum_release_age_ignore_missing_time, + "trustPolicy": request.trust_policy, + "trustPolicyExclude": &request.trust_policy_exclude, + "trustPolicyIgnoreAfter": request.trust_policy_ignore_after, + }); + let bytes = serde_json::to_vec(&input).ok()?; + let mut hasher = Sha256::new(); + hasher.update(bytes); + Some(format!("{:x}", hasher.finalize())) +} + +/// gzip level for the response body. Level 6 (the gzip default) shrinks +/// the JSON lockfile ~16% over level 1 — the win that matters once the +/// server is across a latency link, where fewer bytes means fewer TCP +/// slow-start round trips — while level 9 adds under a percent for several +/// times the CPU. +const GZIP_LEVEL: u32 = 6; + +/// Build the install response: the resolved lockfile and stats as a +/// gzipped JSON object. The client writes the lockfile, then fetches +/// every tarball itself. +fn resolve_response(lockfile: &Lockfile) -> Response { + let total_packages = lockfile.packages.as_ref().map_or(0, |packages| packages.len()); + let header = serde_json::json!({ + "lockfile": serde_json::to_value(lockfile).unwrap_or(serde_json::Value::Null), + "stats": { "totalPackages": total_packages }, + }); + json_gzip_response(&header) +} + +/// Render input-lockfile policy violations into the response body +/// (`{ "violations": [...] }`) so the client rebuilds the identical +/// `VerifyError` and aborts the same way the local gate would. +fn violation_response(violations: &[serde_json::Value]) -> Response { + json_gzip_response(&serde_json::json!({ "violations": violations })) +} + +/// Serialize `value` to JSON and gzip it into a `200` response body. +fn json_gzip_response(value: &serde_json::Value) -> Response { + let body = serde_json::to_vec(value).unwrap_or_else(|_| b"{}".to_vec()); + let mut encoder = GzEncoder::new(Vec::new(), Compression::new(GZIP_LEVEL)); + if encoder.write_all(&body).is_err() { + return json_error(StatusCode::INTERNAL_SERVER_ERROR, "gzip failed"); + } + let gzipped = match encoder.finish() { + Ok(gzipped) => gzipped, + Err(_) => return json_error(StatusCode::INTERNAL_SERVER_ERROR, "gzip failed"), + }; + + Response::builder() + .status(StatusCode::OK) + .header(header::CONTENT_TYPE, "application/json") + .header(header::CONTENT_ENCODING, "gzip") + .body(Body::from(gzipped)) + .expect("binary response is always valid") +} + +/// Why [`verify_input_lockfile`] failed: either the lockfile violated +/// the client's policy (carry the rendered violations so the caller can +/// shape them for the client's protocol) or the verifiers couldn't be +/// built at all (a ready-made error response). +enum VerifyFailure { + Violations(Vec), + Internal(Response), +} + +/// Verify the client's input lockfile under the client's policy. On a +/// clean pass returns `Ok(())`; on a policy violation returns the +/// rendered violations so the caller can deliver them to the client. A +/// build-verifiers failure (e.g. an invalid exclude pattern) returns a +/// ready-made 500. +async fn verify_input_lockfile( + runtime: &Resolver, + config: &'static PacquetConfig, + auth_headers: &Arc, + lockfile: &Lockfile, +) -> Result<(), VerifyFailure> { + // A fresh per-request packument cache shared with the verifier; the + // on-disk metadata mirror under `/v11/metadata-full` is + // warm across requests and is the real verification cache. + let meta_cache = Arc::new(InMemoryPackageMetaCache::default()); + let verifiers = build_resolution_verifiers( + config, + Arc::clone(&runtime.client), + Some(meta_cache as Arc), + Some(Arc::clone(auth_headers)), + ) + .map_err(|err| { + VerifyFailure::Internal(json_error(StatusCode::INTERNAL_SERVER_ERROR, &err.to_string())) + })?; + + // Whole-lockfile verdict cache: an O(1) hit when this exact lockfile + // already passed under a policy we still trust skips the whole fan-out + // (the dominant win for a shared pnpr — CI re-runs, a fleet building + // the same repo). + let hash = hash_lockfile(lockfile); + if let Some(cache) = runtime.verdict_cache.as_ref() + && cache.is_verified(&hash, |policy| { + verifiers.iter().all(|verifier| verifier.can_trust_past_check(policy)) + }) + { + return Ok(()); + } + + let violations = collect_resolution_policy_violations(lockfile, &verifiers, None).await; + if violations.is_empty() { + if let Some(cache) = runtime.verdict_cache.as_ref() { + cache.record(&hash, &merge_policies(&verifiers)); + } + return Ok(()); + } + + let rendered: Vec = violations + .iter() + .map(|violation| { + serde_json::json!({ + "name": violation.name.to_string(), + "version": violation.version, + "code": violation.code, + "reason": violation.reason, + }) + }) + .collect(); + Err(VerifyFailure::Violations(rendered)) +} + +/// Merge every active verifier's policy snapshot into one bag, the key +/// the verdict cache stores alongside the lockfile hash. Later verifiers +/// overwrite earlier ones on a shared key — mirrors the local cache's +/// `merge_policies` so a verdict recorded here is comparable to one the +/// client's own cache would write. +fn merge_policies( + verifiers: &[Arc], +) -> serde_json::Map { + let mut merged = serde_json::Map::new(); + for verifier in verifiers { + for (key, value) in verifier.policy() { + merged.insert(key.clone(), value.clone()); + } + } + merged +} + +fn json_error(status: StatusCode, message: &str) -> Response { + let body = serde_json::json!({ "error": message }).to_string(); + Response::builder() + .status(status) + .header(header::CONTENT_TYPE, "application/json") + .body(Body::from(body)) + .expect("static json error response is always valid") +} + +#[cfg(test)] +mod tests; diff --git a/pnpr/crates/pnpr/src/install_accelerator/protocol.rs b/pnpr/crates/pnpr/src/resolver/protocol.rs similarity index 88% rename from pnpr/crates/pnpr/src/install_accelerator/protocol.rs rename to pnpr/crates/pnpr/src/resolver/protocol.rs index 573541544a..c9f0b2eeb2 100644 --- a/pnpr/crates/pnpr/src/install_accelerator/protocol.rs +++ b/pnpr/crates/pnpr/src/resolver/protocol.rs @@ -1,4 +1,4 @@ -//! Wire types for the pnpr install-accelerator endpoints, matching the +//! Wire types for the pnpr resolver endpoints, matching the //! `@pnpm/pnpr.client` TypeScript client's request shapes. use std::collections::BTreeMap; @@ -9,7 +9,7 @@ pub type DepMap = BTreeMap; #[derive(Debug, Default, Deserialize)] #[serde(rename_all = "camelCase")] -pub struct InstallRequestProject { +pub struct ResolveRequestProject { /// The importer's directory relative to the lockfile dir, in POSIX /// form (`.` for the root, `packages/foo` for a workspace member). #[serde(default = "root_dir")] @@ -26,7 +26,7 @@ fn root_dir() -> String { ".".to_string() } -/// Body of `POST /v1/install`. The registry fields carry the *client's* +/// Body of `POST /v1/resolve`. The registry fields carry the *client's* /// resolution configuration so the server resolves against the same /// registries the client would, and the policy fields carry the /// client's verification policy so the server verifies the input @@ -35,7 +35,7 @@ fn root_dir() -> String { /// parse. #[derive(Debug, Default, Deserialize)] #[serde(rename_all = "camelCase")] -pub struct InstallRequest { +pub struct ResolveRequest { #[serde(default)] pub dependencies: Option, #[serde(default)] @@ -43,9 +43,7 @@ pub struct InstallRequest { #[serde(default)] pub optional_dependencies: Option, #[serde(default)] - pub projects: Option>, - #[serde(default)] - pub store_integrities: Vec, + pub projects: Option>, /// The client's default registry. Falls back to npmjs when absent. #[serde(default)] pub registry: Option, @@ -85,14 +83,6 @@ pub struct InstallRequest { /// comparison during the frozen resolve. #[serde(default)] pub ignore_manifest_check: bool, - /// `lockfileOnly`: resolve and return only the lockfile — skip the - /// tarball fetch and the file-level diff entirely. Mirrors pnpm's - /// `--lockfile-only` (resolve + write lockfile, fetch nothing, link - /// nothing); the response carries just the `L` line, no `D`/`I` - /// lines. See - /// [pnpm/pnpm#12146](https://github.com/pnpm/pnpm/issues/12146). - #[serde(default)] - pub lockfile_only: bool, /// The client's effective `trustLockfile`. When `true` the client /// opted out of lockfile verification, so the server skips the /// input-lockfile verify gate (it still reuses the lockfile for @@ -133,7 +123,7 @@ pub struct ProjectDeps { pub optional_dependencies: DepMap, } -impl InstallRequest { +impl ResolveRequest { /// Every project to resolve, keyed by importer dir. The legacy /// single-project body (top-level `dependencies`/`devDependencies`) /// maps to a single root (`.`) importer; an empty/absent `projects` diff --git a/pnpr/crates/pnpr/src/install_accelerator/resolve.rs b/pnpr/crates/pnpr/src/resolver/resolve.rs similarity index 60% rename from pnpr/crates/pnpr/src/install_accelerator/resolve.rs rename to pnpr/crates/pnpr/src/resolver/resolve.rs index 8c7eb09606..46d49efb54 100644 --- a/pnpr/crates/pnpr/src/install_accelerator/resolve.rs +++ b/pnpr/crates/pnpr/src/resolver/resolve.rs @@ -1,10 +1,10 @@ //! Server-side dependency resolution backed by pacquet. //! //! Writes a throwaway project, resolves it lockfile-only (so -//! `node_modules` is never linked), reads the produced lockfile back, -//! then fetches into the shared store only the packages that aren't -//! cached yet. The store index that results is the source of truth the -//! [`super::diff`] pass reads. +//! `node_modules` is never linked and no tarball is fetched), then reads +//! the produced lockfile back. pnpr serves no files, so the store is +//! never populated with package contents — the client fetches every +//! tarball itself. use std::{ collections::HashSet, @@ -13,24 +13,14 @@ use std::{ use dashmap::DashMap; use pacquet_config::{Config, NodeLinker}; -use pacquet_lockfile::{Lockfile, LockfileResolution}; +use pacquet_lockfile::{Lockfile, check_lockfile_settings, satisfies_package_manifest}; use pacquet_network::{AuthHeaders, ThrottledClient}; use pacquet_package_manager::{Install, ResolvedPackages}; use pacquet_package_manifest::{DependencyGroup, PackageManifest}; use pacquet_reporter::SilentReporter; -use pacquet_store_dir::{SharedVerifiedFilesCache, StoreIndex, StoreIndexWriter, store_index_key}; -use pacquet_tarball::{DownloadTarballToStore, MemCache, RetryOpts}; +use pacquet_tarball::MemCache; -use super::protocol::InstallRequest; - -/// A resolved package distilled from the lockfile, carrying everything -/// needed both to fetch it (`tarball_url`) and to diff it (`integrity`, -/// `pkg_id`). -pub struct ResolvedPkg { - pub pkg_id: String, - pub integrity: String, - pub tarball_url: String, -} +use super::protocol::ResolveRequest; #[derive(Debug)] pub enum ResolveError { @@ -58,8 +48,8 @@ impl From for ResolveError { } /// Resolve a request lockfile-only and return the produced lockfile. -/// The store is intentionally left untouched here (no tarball is -/// fetched); [`fetch_uncached`] populates it afterward. +/// The store is intentionally left untouched (no tarball is fetched): +/// pnpr serves no file content, so the client fetches every tarball. /// /// A single-project request resolves one root (`.`) importer. A /// multi-project request is reconstructed as a real workspace in the @@ -70,7 +60,7 @@ impl From for ResolveError { pub async fn resolve( config: &'static Config, client: &Arc, - request: &InstallRequest, + request: &ResolveRequest, auth_headers: &Arc, ) -> Result { let projects = request.projects_normalized(); @@ -183,7 +173,7 @@ pub async fn resolve( ignore_manifest_check: request.ignore_manifest_check, skip_runtimes: false, // The lockfile was already verified under the client's policy - // (in `handle_install`) before we get here, so the install path + // (in `handle_resolve`) before we get here, so the install path // must not re-verify it. trust_lockfile: true, update_checksums: false, @@ -207,139 +197,69 @@ pub async fn resolve( Ok(lockfile) } -/// Extract every registry/tarball package from the lockfile, deriving -/// the tarball URL the same way pacquet's install path does (registry -/// resolutions never store the URL in a v9 lockfile). -pub fn collect_packages(lockfile: &Lockfile, registry: &str) -> Vec { - let Some(packages) = lockfile.packages.as_ref() else { return Vec::new() }; - let mut out = Vec::with_capacity(packages.len()); - for (key, metadata) in packages { - let dep_path = key.to_string(); - let pkg_id = dep_path.split('(').next().unwrap_or(&dep_path).to_string(); - let Some((integrity, tarball_url)) = fetch_info(&metadata.resolution, &pkg_id, registry) - else { - continue; - }; - out.push(ResolvedPkg { pkg_id, integrity, tarball_url }); - } - out -} - -/// Fetch into the shared store every package whose store-index row is -/// absent, populating its `PackageFilesIndex` as a side effect. Cached -/// packages are skipped, matching the server hot-cache no-op. -/// -/// Returns the `pkg_id`s actually fetched this call — the upstream -/// accepted the caller's credentials for each, so the gate treats a -/// freshly-fetched private package as proven (no re-verify). -pub async fn fetch_uncached( - config: &'static Config, - client: &Arc, - auth_headers: &AuthHeaders, - packages: &[ResolvedPkg], -) -> Result, ResolveError> { - let store_dir = &config.store_dir; - - let present: HashSet = match StoreIndex::open_readonly_in(store_dir) { - Ok(index) => index.keys().unwrap_or_default().into_iter().collect(), - Err(_) => HashSet::new(), - }; - - let to_fetch: Vec<&ResolvedPkg> = packages - .iter() - .filter(|pkg| !present.contains(&store_index_key(&pkg.integrity, &pkg.pkg_id))) - .filter(|pkg| !pkg.tarball_url.is_empty()) - .collect(); - - if to_fetch.is_empty() { - return Ok(HashSet::new()); - } - - let fetched_ids: HashSet = to_fetch.iter().map(|pkg| pkg.pkg_id.clone()).collect(); - - let integrities: Vec> = - to_fetch.iter().map(|pkg| pkg.integrity.parse::().ok()).collect(); - - let shared_index = StoreIndex::shared_readonly_in(store_dir); - let (writer, writer_task) = StoreIndexWriter::spawn(store_dir); - let verified = SharedVerifiedFilesCache::default(); - - let downloads = to_fetch.iter().zip(integrities.iter()).filter_map(|(pkg, integrity)| { - let integrity = integrity.as_ref()?; - let store_index = shared_index.clone(); - let writer = Arc::clone(&writer); - let verified = SharedVerifiedFilesCache::clone(&verified); - Some(async move { - DownloadTarballToStore { - http_client: client, - store_dir, - store_index, - store_index_writer: Some(writer), - verify_store_integrity: config.verify_store_integrity, - verified_files_cache: verified, - package_integrity: integrity, - package_unpacked_size: None, - package_url: &pkg.tarball_url, - package_id: &pkg.pkg_id, - auth_headers, - requester: "pnpr", - prefetched_cas_paths: None, - retry_opts: RetryOpts::default(), - ignore_file_pattern: None, - offline: false, - } - .run_without_mem_cache::() - .await - .map_err(|err| ResolveError::Install(err.to_string())) - }) - }); - - let results = futures_util::future::join_all(downloads).await; - - drop(writer); - let _ = writer_task.await; - - for result in results { - result?; - } - Ok(fetched_ids) -} - -/// Derive `(integrity, tarball_url)` for a resolution, mirroring -/// pacquet's `tarball_url_and_integrity`. Returns `None` for git, -/// directory, binary, and variations resolutions (not served by the -/// pnpr install accelerator). -fn fetch_info( - resolution: &LockfileResolution, - pkg_id: &str, - registry: &str, -) -> Option<(String, String)> { - match resolution { - LockfileResolution::Tarball(tarball) => { - let integrity = tarball.integrity.as_ref()?; - Some((integrity.to_string(), tarball.tarball.clone())) - } - LockfileResolution::Registry(registry_resolution) => { - let (name, version) = split_name_version(pkg_id)?; - let bare = name.rsplit('/').next().unwrap_or(name); - let registry = registry.strip_suffix('/').unwrap_or(registry); - Some(( - registry_resolution.integrity.to_string(), - format!("{registry}/{name}/-/{bare}-{version}.tgz"), - )) - } - _ => None, - } -} - -/// Split `name@version` into its parts, tolerating a leading scope -/// `@` (`@scope/name@1.2.3` → `("@scope/name", "1.2.3")`). -fn split_name_version(pkg_id: &str) -> Option<(&str, &str)> { - let at = pkg_id.rfind('@')?; - if at == 0 { +/// Return the caller's frozen input lockfile when pacquet's freshness +/// checks prove the server's lockfile-only resolve would return it +/// unchanged. +pub fn fresh_frozen_input_lockfile(config: &Config, request: &ResolveRequest) -> Option { + if !request.frozen_lockfile || request.prefer_frozen_lockfile == Some(false) { return None; } - Some((&pkg_id[..at], &pkg_id[at + 1..])) + if request.overrides.as_ref().is_some_and(|value| match value { + serde_json::Value::Object(map) => !map.is_empty(), + serde_json::Value::Null => false, + _ => true, + }) { + return None; + } + if config.package_extensions.as_ref().is_some_and(|extensions| !extensions.is_empty()) + || config + .ignored_optional_dependencies + .as_ref() + .is_some_and(|patterns| !patterns.is_empty()) + || config.inject_workspace_packages + { + return None; + } + + let lockfile = request.lockfile.as_ref()?; + check_lockfile_settings( + lockfile, + None, + None, + None, + config.inject_workspace_packages, + config.peers_suffix_max_length, + ) + .ok()?; + + if request.ignore_manifest_check { + return Some(lockfile.clone()); + } + + let mut projects = request.projects_normalized(); + if projects.len() != 1 { + return None; + } + let project = projects.pop()?; + if project.dir != "." && !project.dir.is_empty() { + return None; + } + let importer = lockfile.importers.get(Lockfile::ROOT_IMPORTER_KEY)?; + let temp = tempfile::Builder::new().prefix("pnpr-frozen-").tempdir().ok()?; + let manifest_path = temp.path().join("package.json"); + let manifest_json = serde_json::json!({ + "name": "pnpr-resolve", + "version": "0.0.0", + "dependencies": project.dependencies, + "devDependencies": project.dev_dependencies, + "optionalDependencies": project.optional_dependencies, + }); + std::fs::write(&manifest_path, serde_json::to_vec(&manifest_json).ok()?).ok()?; + let manifest = PackageManifest::from_path(manifest_path).ok()?; + satisfies_package_manifest(importer, &manifest, Lockfile::ROOT_IMPORTER_KEY, &|_: &str| false) + .ok()?; + + Some(lockfile.clone()) } /// Validate a client-supplied importer dir before joining it onto the diff --git a/pnpr/crates/pnpr/src/install_accelerator/resolve/tests.rs b/pnpr/crates/pnpr/src/resolver/resolve/tests.rs similarity index 100% rename from pnpr/crates/pnpr/src/install_accelerator/resolve/tests.rs rename to pnpr/crates/pnpr/src/resolver/resolve/tests.rs diff --git a/pnpr/crates/pnpr/src/resolver/tests.rs b/pnpr/crates/pnpr/src/resolver/tests.rs new file mode 100644 index 0000000000..5d26d09d59 --- /dev/null +++ b/pnpr/crates/pnpr/src/resolver/tests.rs @@ -0,0 +1,62 @@ +use std::collections::BTreeMap; + +use pacquet_config::Config as PacquetConfig; + +use super::{ + protocol::{ResolveRequest, ResolveRequestProject}, + resolution_cache_key, +}; + +fn config() -> PacquetConfig { + let mut config = PacquetConfig::new(); + config.registry = "https://registry.example.test/".to_string(); + config +} + +fn deps(entries: &[(&str, &str)]) -> BTreeMap { + entries.iter().map(|(name, spec)| ((*name).to_string(), (*spec).to_string())).collect() +} + +#[test] +fn resolution_cache_key_normalizes_single_project_requests() { + let top_level = ResolveRequest { + dependencies: Some(deps(&[("foo", "^1.0.0")])), + ..ResolveRequest::default() + }; + let projects = ResolveRequest { + projects: Some(vec![ResolveRequestProject { + dir: ".".to_string(), + dependencies: deps(&[("foo", "^1.0.0")]), + ..ResolveRequestProject::default() + }]), + ..ResolveRequest::default() + }; + + assert_eq!( + resolution_cache_key(&config(), &top_level), + resolution_cache_key(&config(), &projects), + ); +} + +#[test] +fn resolution_cache_key_changes_with_dependencies_and_policy() { + let base = ResolveRequest { + dependencies: Some(deps(&[("foo", "^1.0.0")])), + ..ResolveRequest::default() + }; + let different_dep = ResolveRequest { + dependencies: Some(deps(&[("foo", "^2.0.0")])), + ..ResolveRequest::default() + }; + let different_policy = ResolveRequest { + dependencies: Some(deps(&[("foo", "^1.0.0")])), + minimum_release_age: Some(60), + ..ResolveRequest::default() + }; + + let config = config(); + let base_key = resolution_cache_key(&config, &base); + + assert_ne!(base_key, resolution_cache_key(&config, &different_dep)); + assert_ne!(base_key, resolution_cache_key(&config, &different_policy)); +} diff --git a/pnpr/crates/pnpr/src/install_accelerator/verdict_cache.rs b/pnpr/crates/pnpr/src/resolver/verdict_cache.rs similarity index 98% rename from pnpr/crates/pnpr/src/install_accelerator/verdict_cache.rs rename to pnpr/crates/pnpr/src/resolver/verdict_cache.rs index f4294e006e..dd4dabd961 100644 --- a/pnpr/crates/pnpr/src/install_accelerator/verdict_cache.rs +++ b/pnpr/crates/pnpr/src/resolver/verdict_cache.rs @@ -1,5 +1,5 @@ //! SQLite-backed whole-lockfile verification verdict cache for the pnpr -//! install accelerator ([pnpm/pnpm#12139](https://github.com/pnpm/pnpm/issues/12139)). +//! resolver ([pnpm/pnpm#12139](https://github.com/pnpm/pnpm/issues/12139)). //! //! Caches the *result* of verifying an entire input lockfile. Like the //! local `lockfile-verified.jsonl` cache diff --git a/pnpr/crates/pnpr/src/install_accelerator/verdict_cache/tests.rs b/pnpr/crates/pnpr/src/resolver/verdict_cache/tests.rs similarity index 100% rename from pnpr/crates/pnpr/src/install_accelerator/verdict_cache/tests.rs rename to pnpr/crates/pnpr/src/resolver/verdict_cache/tests.rs diff --git a/pnpr/crates/pnpr/src/s3.rs b/pnpr/crates/pnpr/src/s3.rs index 1e8b64433e..48ec810c7e 100644 --- a/pnpr/crates/pnpr/src/s3.rs +++ b/pnpr/crates/pnpr/src/s3.rs @@ -10,7 +10,7 @@ //! Any S3-compatible endpoint works: AWS S3 (omit `endpoint`), //! Cloudflare R2 (`region: auto`, the account endpoint), MinIO, //! Backblaze B2, Wasabi, etc. The disposable proxy cache and the -//! install-accelerator SQLite stores stay on local disk regardless — +//! resolver SQLite stores stay on local disk regardless — //! only the hosted store is pluggable. use crate::{error::Result, package_name::PackageName}; diff --git a/pnpr/crates/pnpr/src/server.rs b/pnpr/crates/pnpr/src/server.rs index 604749a44c..9f25a7b0ec 100644 --- a/pnpr/crates/pnpr/src/server.rs +++ b/pnpr/crates/pnpr/src/server.rs @@ -68,9 +68,9 @@ struct AppInner { /// two concurrent writers to the same package on this instance can't /// lose each other's changes. See [`PackageLocks`]. package_locks: PackageLocks, - /// Lazily-built engine backing the `/v1/install` endpoint. Built on + /// Lazily-built engine backing the `/v1/resolve` endpoint. Built on /// first such request so servers that never receive one pay nothing. - install_accelerator: std::sync::OnceLock, + resolver: std::sync::OnceLock, } /// Per-package serialization for the read-modify-write packument flows @@ -151,16 +151,16 @@ pub fn router_with_auth(config: Config, auth: AuthState) -> Router { config, auth, package_locks: PackageLocks::new(), - install_accelerator: std::sync::OnceLock::new(), + resolver: std::sync::OnceLock::new(), }), }; Router::new() .route("/-/ping", get(serve_ping)) - // pnpr install accelerator: opt-in, versioned endpoints layered on the + // pnpr resolver: opt-in, versioned endpoints layered on the // registry core. Non-pnpm clients never touch these. `/-/pnpr` // is the capability handshake (404 on a plain registry). .route("/-/pnpr", get(serve_pnpr_handshake)) - .route("/v1/install", post(serve_install)) + .route("/v1/resolve", post(serve_resolve)) .route("/{name}", get(get_packument_unscoped).put(put_one_segment)) .route("/{first}/{second}", get(get_two_segments).put(put_two_segments)) .route( @@ -180,19 +180,15 @@ pub fn router_with_auth(config: Config, auth: AuthState) -> Router { // gzip`, matching how a real (CDN-fronted) registry serves // packuments — pnpr is commonly hit directly with no proxy in // front, so the application is the only layer that can compress. - // Scoped to JSON: the binary endpoints are excluded so we never - // re-gzip an already-compressed payload — tarballs - // (`application/octet-stream`, already `.tgz`) and the install - // accelerator response (`application/x-pnpr-install-inline`, - // already gzipped). Already-`Content-Encoding` responses are - // skipped by the layer regardless. - .layer( - CompressionLayer::new().compress_when( - DefaultPredicate::new() - .and(NotForContentType::const_new("application/octet-stream")) - .and(NotForContentType::const_new("application/x-pnpr-install-inline")), - ), - ) + // Scoped to JSON: tarballs (`application/octet-stream`, already + // `.tgz`) are excluded so we never re-gzip an already-compressed + // payload. The resolver response is `application/json` but is + // gzipped at the handler and carries `Content-Encoding: gzip`, so + // the layer skips it regardless (already-`Content-Encoding` + // responses are never re-compressed). + .layer(CompressionLayer::new().compress_when( + DefaultPredicate::new().and(NotForContentType::const_new("application/octet-stream")), + )) // One structured access record per HTTP request: a span // carrying method + URI plus a single `finished processing // request` event on the response with status and latency. @@ -1665,32 +1661,20 @@ async fn serve_ping(State(_state): State) -> Response { (StatusCode::OK, axum::Json(serde_json::json!({}))).into_response() } -/// `GET /-/pnpr` — capability handshake for the pnpr install-accelerator +/// `GET /-/pnpr` — capability handshake for the pnpr resolver /// protocol. A plain npm registry has no such route and 404s, so a /// client can fail fast against a misconfigured server. `versions` -/// lists the `/vN/install` protocol versions this server speaks. +/// lists the `/vN/resolve` protocol versions this server speaks. async fn serve_pnpr_handshake() -> Response { (StatusCode::OK, axum::Json(serde_json::json!({ "pnpr": { "versions": [1] } }))).into_response() } -async fn serve_install( - State(state): State, - headers: HeaderMap, - body: axum::body::Bytes, -) -> Response { - let runtime = crate::install_accelerator::InstallAccelerator::get_or_init( - &state.inner.install_accelerator, - &state.inner.config, - ); - let identity = match resolve_identity(&state, &headers).await { - Ok(identity) => identity, - Err(err) => return error_response(&err), - }; - crate::install_accelerator::handle_install( - runtime, - &state.inner.config.policies, - identity, - body, - ) - .await +async fn serve_resolve(State(state): State, body: axum::body::Bytes) -> Response { + // pnpr resolves but serves no file content, so there is no per-package + // read gate here: the client fetches every tarball directly from the + // registry with its own credentials, and resolution uses the client's + // forwarded credentials for private packages. + let runtime = + crate::resolver::Resolver::get_or_init(&state.inner.resolver, &state.inner.config); + crate::resolver::handle_resolve(runtime, body).await } diff --git a/pnpr/npm/pnpr/README.md b/pnpr/npm/pnpr/README.md index 0fe83e484f..559afb499e 100644 --- a/pnpr/npm/pnpr/README.md +++ b/pnpr/npm/pnpr/README.md @@ -41,7 +41,7 @@ pnpm config set registry http://127.0.0.1:4873/ | `-c, --config ` | Path to a verdaccio-shaped YAML config. When omitted, the bundled default is used. | | `--listen ` | Address to bind to. Defaults to `127.0.0.1:4873`. | | `--storage ` | Override the storage directory from the loaded config. | -| `--cache ` | Override the disposable proxy-cache directory (the mirror of upstream registries plus the install-accelerator store). Defaults to a `.pnpr-cache` subdirectory of `--storage`. | +| `--cache ` | Override the disposable proxy-cache directory (the mirror of upstream registries plus the resolver cache). Defaults to a `.pnpr-cache` subdirectory of `--storage`. | | `--public-url ` | URL clients should use to reach the server, used when rewriting `dist.tarball` in served packuments. Defaults to `http://`. | | `--packument-ttl-secs ` | Seconds before a cached packument is considered stale and refetched. | @@ -85,13 +85,13 @@ pnpr -c ./pnpr.yaml - **Hosted** — the source of truth: packages published to this server plus anything served in static mode. This lives under `storage`. - **Cache** — the disposable mirror of upstream registries plus the - install-accelerator store. This lives under `cache` (defaults to + resolver cache. This lives under `cache` (defaults to `/.pnpr-cache`). By default both are local directories. Adding an `s3:` block moves the **hosted** store into an S3-compatible object store, so the durable data is replicated by the provider and can be shared by several stateless -`pnpr` replicas. The cache and the install-accelerator databases always +`pnpr` replicas. The cache and the resolver databases always stay on local disk — only the hosted store is pluggable. Because any S3-compatible endpoint works, this also covers **Cloudflare diff --git a/worker/src/index.ts b/worker/src/index.ts index bf5910fa91..14638f1daf 100644 --- a/worker/src/index.ts +++ b/worker/src/index.ts @@ -18,7 +18,6 @@ import type { LinkPkgMessage, SymlinkAllModulesMessage, TarballExtractMessage, - WriteCafsFilesMessage, } from './types.js' let workerPool: WorkerPool | undefined @@ -201,31 +200,6 @@ export async function addFilesFromTarball (opts: AddFilesFromTarballOptions): Pr } -export async function writeCafsFiles (opts: { - storeDir: string - payload: Uint8Array -}): Promise { - if (!workerPool) { - workerPool = createTarballWorkerPool() - } - const localWorker = await workerPool.checkoutWorkerAsync(true) - return new Promise((resolve, reject) => { - localWorker.once('message', ({ status, error, filesWritten }) => { - workerPool!.checkinWorker(localWorker) - if (status === 'error') { - reject(new PnpmError('CAFS_WRITE', error.message)) - return - } - resolve(filesWritten) - }) - localWorker.postMessage({ - type: 'write-cafs-files', - storeDir: opts.storeDir, - payload: opts.payload, - } satisfies WriteCafsFilesMessage) - }) -} - export interface ReadPkgFromCafsContext { storeDir: string verifyStoreIntegrity: boolean @@ -279,33 +253,7 @@ export async function readPkgFromCafs ( // so, running them in parallel helps only to a point. // With local experimenting it was discovered that running 4 workers gives the best results. // Adding more workers actually makes installation slower. -let limitImportingPackage = pLimit(4) - -/** - * Temporarily change import concurrency. Called by the pnpr server code path - * where there's no concurrent fetching competing for workers. Returns a - * disposer that restores the previous limiter — callers must invoke it (in a - * finally block) to avoid leaking the mutation to other installs in the same - * process (e.g. test suites). - * - * If two installs overlap, the disposer for the outer install would otherwise - * clobber the inner one's still-active limiter. Each disposer captures the - * limiter it installed and only restores when it's still the active one, - * leaving any newer override in place. - */ -export function setImportConcurrency (concurrency: number): () => void { - if (!Number.isInteger(concurrency) || concurrency < 1) { - throw new Error(`setImportConcurrency: expected a positive integer, got ${concurrency}`) - } - const previous = limitImportingPackage - const installed = pLimit(concurrency) - limitImportingPackage = installed - return () => { - if (limitImportingPackage === installed) { - limitImportingPackage = previous - } - } -} +const limitImportingPackage = pLimit(4) export async function importPackage ( opts: Omit diff --git a/worker/src/start.ts b/worker/src/start.ts index be1e58459f..670b9e44f0 100644 --- a/worker/src/start.ts +++ b/worker/src/start.ts @@ -34,7 +34,6 @@ import type { ReadPkgFromCafsMessage, SymlinkAllModulesMessage, TarballExtractMessage, - WriteCafsFilesMessage, } from './types.js' export function startWorker (): void { @@ -65,7 +64,6 @@ async function handleMessage ( | SymlinkAllModulesMessage | HardLinkDirMessage | InitStoreMessage - | WriteCafsFilesMessage | false ): Promise { if (message === false) { @@ -169,10 +167,6 @@ async function handleMessage ( parentPort!.postMessage({ status: 'success' }) break } - case 'write-cafs-files': { - parentPort!.postMessage(await writeCafsFiles(message)) - break - } } } catch (e: any) { // eslint-disable-line parentPort!.postMessage({ @@ -501,73 +495,3 @@ function symlinkAllModules (opts: SymlinkAllModulesMessage): { status: 'success' return { status: 'success' } } -async function writeCafsFiles (message: WriteCafsFilesMessage): Promise<{ status: string, filesWritten: number }> { - const { contentPathFromHex } = await import('@pnpm/store.cafs') - - // `message.payload` is the already-decompressed file portion of a - // `/v1/install` response: a length-prefixed JSON header, then one - // `[64-byte digest][u32 size][1-byte exec][content]` frame per file, - // terminated by 64 zero bytes. - const payload = Buffer.from(message.payload.buffer, message.payload.byteOffset, message.payload.byteLength) - const END_MARKER = Buffer.alloc(64, 0) - const createdDirs = new Set() - - if (payload.length < 4) { - throw new Error('pnpr server /v1/install file payload is truncated') - } - // Skip the length-prefixed JSON header that precedes the frames. - const jsonLen = payload.readUInt32BE(0) - let offset = 4 + jsonLen - let filesWritten = 0 - let endMarkerSeen = false - - while (offset + 64 <= payload.length) { - if (payload.subarray(offset, offset + 64).equals(END_MARKER)) { - endMarkerSeen = true - offset += 64 - break - } - if (offset + 69 > payload.length) break // 64 digest + 4 size + 1 mode - const size = payload.readUInt32BE(offset + 64) - const entryLen = 69 + size - if (offset + entryLen > payload.length) break // incomplete entry - - const digest = payload.subarray(offset, offset + 64).toString('hex') - const executable = (payload[offset + 68] & 0x01) !== 0 - const content = payload.subarray(offset + 69, offset + entryLen) - - const relPath = contentPathFromHex(executable ? 'exec' : 'nonexec', digest) - const fullPath = path.join(message.storeDir, relPath) - const dir = path.dirname(fullPath) - if (!createdDirs.has(dir)) { - fs.mkdirSync(dir, { recursive: true }) - createdDirs.add(dir) - } - try { - fs.writeFileSync(fullPath, content, { flag: 'wx', mode: executable ? 0o755 : 0o644 }) - } catch (err: unknown) { - if (!(err instanceof Error && 'code' in err && (err as NodeJS.ErrnoException).code === 'EEXIST')) { - throw err - } - // EEXIST means the same digest is already at this CAFS path. CAFS is - // content-addressed, so a complete file is by definition correct. But a - // previous process could have crashed mid-write and left a truncated - // file — the pnpr path skips integrity verification, so we'd silently - // install garbage. Detect truncation by size and overwrite atomically. - const onDiskSize = fs.statSync(fullPath).size - if (onDiskSize !== content.length) { - const tmpPath = `${fullPath}.tmp-${process.pid}-${Date.now()}` - fs.writeFileSync(tmpPath, content, { mode: executable ? 0o755 : 0o644 }) - fs.renameSync(tmpPath, fullPath) - } - } - filesWritten++ - offset += entryLen - } - - if (!endMarkerSeen) { - throw new Error('pnpr server /v1/install file payload ended without the end marker') - } - return { status: 'success', filesWritten } -} - diff --git a/worker/src/types.ts b/worker/src/types.ts index a3a8fd9f51..4251685daa 100644 --- a/worker/src/types.ts +++ b/worker/src/types.ts @@ -79,15 +79,3 @@ export interface HardLinkDirMessage { src: string destDirs: string[] } - -export interface WriteCafsFilesMessage { - type: 'write-cafs-files' - storeDir: string - /** - * The binary file frames from a `/v1/install` response, already - * decompressed: a length-prefixed JSON header followed by one - * `[64-byte digest][u32 size][1-byte exec][content]` frame per file, - * terminated by 64 zero bytes. - */ - payload: Uint8Array -}