fix(pacquet/resolving-npm-resolver): forward etag/modified on upgrade fetch

When `minimumReleaseAge` is active, the resolver fetches abbreviated
metadata first and then upgrades to full metadata to read each
version's `time` field. Pacquet's upgrade fetch went out
unconditionally — no `If-None-Match` / `If-Modified-Since` — so
every triggered upgrade redownloaded the full packument body even
when the registry's full-form representation hadn't changed.

Upstream pnpm forwards `meta.etag` and `meta.modified` to the
upgrade fetch and treats `304 Not Modified` as "keep the
abbreviated meta and fall through to the picker's warn-and-skip
path" (`resolving/npm-resolver/src/pickPackage.ts:488-499`).

This change matches that:

- `FetchFullMetadataOptions` gains `etag` and `modified` fields.
- `fetch_full_metadata` honours them as conditional headers and
  returns a `FetchFullMetadataOutcome::{ Modified(Box<Package>),
  NotModified }` enum so callers can react to a 304.
- `maybe_upgrade_abbreviated_meta_for_release_age` forwards the
  abbreviated meta's etag/modified into the upgrade fetch and, on
  `NotModified`, returns the original meta untouched — same
  short-circuit as upstream's `notModified` arm.

Test: `fetch_full_metadata_returns_not_modified_on_304` asserts the
request carries the expected conditional headers and that a 304
response surfaces as `NotModified` without a body.

This is a correctness/parity fix; the user's benchmark showed it
doesn't move the wall-clock for their hot-cache scenario, which is
expected — once the abbreviated mirror has been written with the
upgraded full meta (carrying `time`), subsequent runs short-circuit
at the `meta.time.is_some()` guard before ever issuing the upgrade
fetch. The hot path is still the resolve walk itself; we'll keep
looking for the remaining gap separately.
This commit is contained in:
Zoltan Kochan
2026-05-21 22:13:02 +02:00
parent 386a90b5c1
commit 58f49c9056
4 changed files with 152 additions and 17 deletions

View File

@@ -22,6 +22,7 @@
use pacquet_network::{AuthHeaders, ThrottledClient};
use pacquet_registry::Package;
use reqwest::{StatusCode, header};
use crate::{FetchMetadataError, registry_url::to_registry_url};
@@ -36,8 +37,8 @@ pub(crate) const ACCEPT_ABBREVIATED_DOC: &str =
"application/vnd.npm.install-v1+json; q=1.0, application/json; q=0.8, */*";
/// Options bundle for [`fetch_full_metadata`]. Mirrors upstream's
/// `FetchFullMetadataCachedOptions` minus the cache fields; the
/// cached variant layers them on.
/// `FetchFullMetadataCachedOptions` minus the cache-directory field;
/// the cached variant layers it on.
#[derive(Debug, Clone)]
pub struct FetchFullMetadataOptions<'a> {
pub registry: &'a str,
@@ -48,15 +49,56 @@ pub struct FetchFullMetadataOptions<'a> {
/// `install-v1` form. Mirrors upstream's
/// [`opts.fullMetadata`](https://github.com/pnpm/pnpm/blob/2a9bd897bf/resolving/npm-resolver/src/fetch.ts#L113).
pub full_metadata: bool,
/// Optional `If-None-Match` header value. When `Some`, the
/// registry can answer the request with `304 Not Modified` and
/// the fetcher returns [`FetchFullMetadataOutcome::NotModified`]
/// instead of a body. Mirrors upstream's
/// [`etag` option](https://github.com/pnpm/pnpm/blob/2a9bd897bf/resolving/npm-resolver/src/fetch.ts#L113)
/// passed verbatim into the `make-fetch-happen` request.
pub etag: Option<&'a str>,
/// Optional `If-Modified-Since` header value. Same role as
/// [`Self::etag`] — gives the registry a chance to short-circuit
/// the body re-download. Mirrors upstream's `modified` option at
/// the same call site.
pub modified: Option<&'a str>,
}
/// Outcome of a [`fetch_full_metadata`] call. Mirrors upstream's
/// [`FetchMetadataResult | FetchMetadataNotModifiedResult`](https://github.com/pnpm/pnpm/blob/2a9bd897bf/resolving/npm-resolver/src/fetch.ts#L80-L86)
/// union — the caller (today: only
/// [`maybe_upgrade_abbreviated_meta_for_release_age`](crate::pick_package))
/// reacts differently to a 304 than to a 200. [`Package`] is boxed
/// so the size of the enum stays small even though a full packument
/// can be many KB; mirrors the same boxing pattern used elsewhere in
/// the crate when a large struct sits next to a unit variant.
#[derive(Debug, Clone)]
pub enum FetchFullMetadataOutcome {
/// Registry returned a 2xx with a parsed body.
Modified(Box<Package>),
/// Registry returned `304 Not Modified` because the conditional
/// headers matched. Callers keep the meta they already have.
NotModified,
}
/// Fetch the registry metadata document for `pkg_name`. The
/// `full_metadata` flag on [`FetchFullMetadataOptions`] picks
/// between the full and abbreviated packument forms.
///
/// When [`FetchFullMetadataOptions::etag`] or
/// [`FetchFullMetadataOptions::modified`] is set, the request
/// includes `If-None-Match` / `If-Modified-Since` headers and the
/// registry may answer `304 Not Modified` — the fetcher returns
/// [`FetchFullMetadataOutcome::NotModified`] without a body in that
/// case. Mirrors upstream's
/// [`fetchFromRegistry`](https://github.com/pnpm/pnpm/blob/2a9bd897bf/network/fetch/src/fetchFromRegistry.ts#L41-L86)
/// 304 short-circuit, used by
/// [`maybeUpgradeAbbreviatedMetaForReleaseAge`](https://github.com/pnpm/pnpm/blob/2a9bd897bf/resolving/npm-resolver/src/pickPackage.ts#L488-L499)
/// so the upgrade fetch coalesces against the registry's
/// representation cache.
pub async fn fetch_full_metadata(
pkg_name: &str,
opts: &FetchFullMetadataOptions<'_>,
) -> Result<Package, FetchMetadataError> {
) -> Result<FetchFullMetadataOutcome, FetchMetadataError> {
// Format once and reuse for the request, the auth-header lookup,
// and the error mapper. Mirrors upstream's
// [`toUri`](https://github.com/pnpm/pnpm/blob/2a9bd897bf/resolving/npm-resolver/src/fetch.ts)
@@ -66,14 +108,24 @@ pub async fn fetch_full_metadata(
let url = to_registry_url(opts.registry, pkg_name);
let accept = if opts.full_metadata { ACCEPT_FULL_DOC } else { ACCEPT_ABBREVIATED_DOC };
let mut request =
opts.http_client.acquire_for_url(&url).await.get(&url).header("accept", accept);
opts.http_client.acquire_for_url(&url).await.get(&url).header(header::ACCEPT, accept);
if let Some(value) = opts.auth_headers.for_url(&url) {
request = request.header("authorization", value);
request = request.header(header::AUTHORIZATION, value);
}
if let Some(etag) = opts.etag {
request = request.header(header::IF_NONE_MATCH, etag);
}
if let Some(modified) = opts.modified {
request = request.header(header::IF_MODIFIED_SINCE, modified);
}
let response = request
.send()
.await
.map_err(|error| FetchMetadataError::Network { url: url.clone(), error })?
.map_err(|error| FetchMetadataError::Network { url: url.clone(), error })?;
if response.status() == StatusCode::NOT_MODIFIED {
return Ok(FetchFullMetadataOutcome::NotModified);
}
let response = response
.error_for_status()
.map_err(|error| FetchMetadataError::Network { url: url.clone(), error })?;
// Decode in two steps so a JSON-shape mismatch surfaces as
@@ -85,8 +137,9 @@ pub async fn fetch_full_metadata(
.text()
.await
.map_err(|error| FetchMetadataError::Network { url: url.clone(), error })?;
serde_json::from_str(&raw_body)
.map_err(|error| FetchMetadataError::Decode { url: url.clone(), error })
let meta: Package = serde_json::from_str(&raw_body)
.map_err(|error| FetchMetadataError::Decode { url: url.clone(), error })?;
Ok(FetchFullMetadataOutcome::Modified(Box::new(meta)))
}
#[cfg(test)]

View File

@@ -1,6 +1,18 @@
use pacquet_network::{AuthHeaders, ThrottledClient};
use super::{FetchFullMetadataOptions, fetch_full_metadata};
use super::{FetchFullMetadataOptions, FetchFullMetadataOutcome, fetch_full_metadata};
/// Unwrap a [`FetchFullMetadataOutcome::Modified`], panicking on
/// `NotModified`. Used by the success-path tests below where the
/// mock always responds 200.
fn expect_modified(outcome: FetchFullMetadataOutcome) -> pacquet_registry::Package {
match outcome {
FetchFullMetadataOutcome::Modified(pkg) => *pkg,
FetchFullMetadataOutcome::NotModified => {
panic!("expected Modified outcome, got NotModified")
}
}
}
/// Fetches against a real `mockito` server that asserts the request
/// arrives with the *full*-metadata `Accept` header
@@ -57,9 +69,12 @@ async fn fetch_full_metadata_targets_full_endpoint_with_auth() {
http_client: &http_client,
auth_headers: &auth_headers,
full_metadata: true,
etag: None,
modified: None,
};
let pkg = fetch_full_metadata("acme", &opts).await.expect("server returns 200");
let pkg =
expect_modified(fetch_full_metadata("acme", &opts).await.expect("server returns 200"));
assert_eq!(pkg.name, "acme");
assert_eq!(pkg.published_at("1.0.0"), Some("2025-01-10T08:30:00.000Z"));
let version = pkg.versions.get("1.0.0").expect("version present");
@@ -86,6 +101,8 @@ async fn fetch_full_metadata_surfaces_5xx_as_network_error() {
http_client: &http_client,
auth_headers: &auth_headers,
full_metadata: true,
etag: None,
modified: None,
};
let err = fetch_full_metadata("acme", &opts).await.expect_err("503 must surface");
@@ -139,10 +156,13 @@ async fn fetch_full_metadata_encodes_scoped_name() {
http_client: &http_client,
auth_headers: &auth_headers,
full_metadata: true,
etag: None,
modified: None,
};
let pkg =
fetch_full_metadata("@scope/pkg", &opts).await.expect("encoded scoped name reaches mock");
let pkg = expect_modified(
fetch_full_metadata("@scope/pkg", &opts).await.expect("encoded scoped name reaches mock"),
);
assert_eq!(pkg.name, "@scope/pkg");
mock.assert_async().await;
}
@@ -171,6 +191,8 @@ async fn fetch_full_metadata_surfaces_decode_failure_distinctly() {
http_client: &http_client,
auth_headers: &auth_headers,
full_metadata: true,
etag: None,
modified: None,
};
let err = fetch_full_metadata("acme", &opts).await.expect_err("malformed JSON must surface");
@@ -180,3 +202,43 @@ async fn fetch_full_metadata_surfaces_decode_failure_distinctly() {
);
mock.assert_async().await;
}
/// When the caller supplies `etag` / `modified`, the request carries
/// `If-None-Match` / `If-Modified-Since` and a registry `304` answers
/// with [`FetchFullMetadataOutcome::NotModified`] instead of a body.
/// Matches upstream's
/// [`notModified`](https://github.com/pnpm/pnpm/blob/2a9bd897bf/network/fetch/src/fetchFromRegistry.ts#L41-L86)
/// short-circuit, which `maybeUpgradeAbbreviatedMetaForReleaseAge`
/// relies on to coalesce the upgrade fetch against the registry's
/// representation cache.
#[tokio::test]
async fn fetch_full_metadata_returns_not_modified_on_304() {
let mut server = mockito::Server::new_async().await;
let mock = server
.mock("GET", "/acme")
.match_header("if-none-match", r#"W/"fresh""#)
.match_header("if-modified-since", "Wed, 15 Jan 2025 12:00:00 GMT")
.with_status(304)
.expect(1)
.create_async()
.await;
let registry = format!("{}/", server.url());
let http_client = ThrottledClient::default();
let auth_headers = AuthHeaders::default();
let opts = FetchFullMetadataOptions {
registry: &registry,
http_client: &http_client,
auth_headers: &auth_headers,
full_metadata: true,
etag: Some(r#"W/"fresh""#),
modified: Some("Wed, 15 Jan 2025 12:00:00 GMT"),
};
let outcome = fetch_full_metadata("acme", &opts).await.expect("304 must succeed");
assert!(
matches!(outcome, FetchFullMetadataOutcome::NotModified),
"expected NotModified, got: {outcome:?}",
);
mock.assert_async().await;
}

View File

@@ -42,7 +42,9 @@ pub use create_npm_resolution_verifier::{
};
pub use errors::FetchMetadataError;
pub use fetch_attestation_published_at::{FetchAttestationOptions, fetch_attestation_published_at};
pub use fetch_full_metadata::{FetchFullMetadataOptions, fetch_full_metadata};
pub use fetch_full_metadata::{
FetchFullMetadataOptions, FetchFullMetadataOutcome, fetch_full_metadata,
};
pub use fetch_full_metadata_cached::{FetchFullMetadataCachedOptions, fetch_full_metadata_cached};
pub use mirror::{ABBREVIATED_META_DIR, FULL_META_DIR};
pub use named_registry::{

View File

@@ -67,8 +67,8 @@ use pacquet_resolving_resolver_base::VersionSelectors;
use tokio::sync::Semaphore;
use crate::{
FetchFullMetadataCachedOptions, FetchFullMetadataOptions, FetchMetadataError,
fetch_full_metadata, fetch_full_metadata_cached,
FetchFullMetadataCachedOptions, FetchFullMetadataOptions, FetchFullMetadataOutcome,
FetchMetadataError, fetch_full_metadata, fetch_full_metadata_cached,
mirror::{
ABBREVIATED_META_DIR, FULL_META_DIR, get_pkg_mirror_path, load_meta, prepare_json_for_disk,
save_meta,
@@ -953,6 +953,13 @@ struct UpgradeOutcome {
/// shape as upstream's `persistUpgradedMeta`, which intentionally
/// updates the *abbreviated* cache file with full data so the next
/// install sees `time` populated and skips the upgrade fetch.
///
/// The upgrade fetch forwards `meta.etag` and `meta.modified` as
/// conditional headers. When the registry's full-form representation
/// hasn't changed it answers `304 Not Modified` and the abbreviated
/// meta is returned untouched — matches upstream's `notModified`
/// short-circuit at
/// [`pickPackage.ts#L488-L499`](https://github.com/pnpm/pnpm/blob/2a9bd897bf/resolving/npm-resolver/src/pickPackage.ts#L488-L499).
async fn maybe_upgrade_abbreviated_meta_for_release_age<Cache: PackageMetaCache>(
ctx: &PickPackageContext<'_, Cache>,
spec: &RegistryPackageSpec,
@@ -991,9 +998,20 @@ async fn maybe_upgrade_abbreviated_meta_for_release_age<Cache: PackageMetaCache>
http_client: ctx.http_client,
auth_headers: ctx.auth_headers,
full_metadata: true,
etag: meta.etag.as_deref(),
modified: meta.modified.as_deref(),
};
let upgraded = fetch_full_metadata(&spec.name, &fetch_opts).await?;
Ok(UpgradeOutcome { meta: upgraded, upgraded: true })
match fetch_full_metadata(&spec.name, &fetch_opts).await? {
FetchFullMetadataOutcome::Modified(upgraded) => {
Ok(UpgradeOutcome { meta: *upgraded, upgraded: true })
}
// 304: the full-form representation matched the conditional
// headers, so the abbreviated meta is still the freshest
// signal we have. Keep it and let the downstream picker
// fall through to its warn-and-skip path on the missing
// `time` map — mirrors upstream's `notModified` arm.
FetchFullMetadataOutcome::NotModified => Ok(UpgradeOutcome { meta, upgraded: false }),
}
}
/// Write the upgraded full metadata back to `pkg_mirror` (which