diff --git a/Cargo.lock b/Cargo.lock index 4609999942..74608e16e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3569,6 +3569,7 @@ version = "0.0.1" dependencies = [ "assert_cmd", "base64 0.22.1", + "chrono", "clap", "command-extra", "derive_more", @@ -3624,6 +3625,7 @@ dependencies = [ "tempfile", "tokio", "walkdir", + "wax", "which 8.0.4", "windows-sys 0.61.2", ] diff --git a/pacquet/crates/cli/Cargo.toml b/pacquet/crates/cli/Cargo.toml index 694320f324..86098aeadf 100644 --- a/pacquet/crates/cli/Cargo.toml +++ b/pacquet/crates/cli/Cargo.toml @@ -43,6 +43,7 @@ pacquet-workspace-manifest-writer = { workspace = true } pacquet-workspace-projects-graph = { workspace = true } pacquet-workspace-state = { workspace = true } +chrono = { workspace = true } clap = { workspace = true } derive_more = { workspace = true } dialoguer = { workspace = true } @@ -62,6 +63,7 @@ tabled = { workspace = true } tokio = { workspace = true } which = { workspace = true } base64 = { workspace = true } +wax = { workspace = true } # Windows has no process groups, so a Job Object ties the lifetime of spawned # children (lifecycle scripts and their descendants) to pacquet's. See diff --git a/pacquet/crates/cli/src/cli_args.rs b/pacquet/crates/cli/src/cli_args.rs index c6f7cd5ef8..3062141c10 100644 --- a/pacquet/crates/cli/src/cli_args.rs +++ b/pacquet/crates/cli/src/cli_args.rs @@ -1,4 +1,5 @@ pub mod add; +pub mod cache; pub mod cat_file; pub mod cat_index; pub mod create; @@ -21,6 +22,7 @@ pub mod why; use crate::{State, config_deps, config_overrides::ConfigOverrides}; use add::AddArgs; +use cache::CacheCommand; use cat_file::CatFileArgs; use cat_index::CatIndexArgs; use clap::{Parser, Subcommand, ValueEnum}; @@ -168,6 +170,9 @@ pub enum CliCommand { /// Managing the package store. #[clap(subcommand)] Store(StoreCommand), + /// Inspect and manage the metadata cache. + #[clap(subcommand)] + Cache(CacheCommand), /// Prints the contents of a file based on the hash value stored in the index file. CatFile(CatFileArgs), /// Prints the index file of a specific package from the store. @@ -472,6 +477,7 @@ impl CliArgs { args.run(|| config().map(|m| &*m))?; } CliCommand::Store(command) => command.run(|| config().map(|m| &*m))?, + CliCommand::Cache(command) => command.run(config()?)?, CliCommand::CatFile(args) => { args.run(|| config().map(|m| &*m))?; } diff --git a/pacquet/crates/cli/src/cli_args/cache.rs b/pacquet/crates/cli/src/cli_args/cache.rs new file mode 100644 index 0000000000..98f72880cf --- /dev/null +++ b/pacquet/crates/cli/src/cli_args/cache.rs @@ -0,0 +1,248 @@ +use clap::Subcommand; +use indexmap::IndexMap; +use miette::IntoDiagnostic; +use pacquet_config::{Config, ResolutionMode}; +use pacquet_resolving_npm_resolver::mirror::{ + ABBREVIATED_META_DIR, FULL_FILTERED_META_DIR, get_registry_name, load_meta, +}; +use pacquet_store_dir::StoreIndex; +use serde_json::json; +use std::{ + fs, + path::{Path, PathBuf}, +}; +use wax::walk::Entry; + +#[derive(Debug, Subcommand)] +pub enum CacheCommand { + /// Lists the available packages metadata cache. Supports filtering by glob. + List { packages: Vec }, + /// Lists all registries that have their metadata cache locally. + ListRegistries, + /// Views information from the specified package's cache. + View { package: String }, + /// Deletes metadata cache for the specified package(s). Supports patterns. + Delete { packages: Vec }, +} + +impl CacheCommand { + fn meta_dir(config: &Config) -> &'static str { + if config.resolution_mode == ResolutionMode::TimeBased + && !config.registry_supports_time_field + { + FULL_FILTERED_META_DIR + } else { + ABBREVIATED_META_DIR + } + } + + fn cache_dir(config: &Config) -> PathBuf { + config.cache_dir.join(Self::meta_dir(config)) + } + + /// Filesystem-safe slug of the configured registry, used as the top-level + /// directory under the metadata cache root. A malformed registry URL is a + /// configuration error, so we surface it rather than broadening the glob + /// scope to every registry — important because `delete` is destructive. + fn registry_prefix(config: &Config) -> miette::Result { + get_registry_name(&config.registry).into_diagnostic() + } + + /// Reject names whose glob would escape the cache root. pnpm passes filter + /// arguments straight into a glob; pacquet additionally guards against `..` + /// segments so a crafted name can't match files outside the cache tree — + /// `delete` removes whatever the glob resolves to. + fn reject_path_traversal(name: &str) -> miette::Result<()> { + if name.contains("..") { + return Err(miette::miette!( + "Invalid package name '{name}': path traversal sequences are not allowed" + )); + } + Ok(()) + } + + fn find_metadata_files( + config: &Config, + cache_dir: &Path, + packages: &[String], + ) -> miette::Result> { + let registry_prefix = Self::registry_prefix(config)?; + + let patterns = if packages.is_empty() { + vec![format!("{registry_prefix}/**")] + } else { + packages + .iter() + .map(|pkg| { + Self::reject_path_traversal(pkg)?; + // Filters are matched literally, as in pnpm — they are glob + // segments, not package names, so they are not re-encoded. + Ok(format!("{registry_prefix}/{pkg}.jsonl")) + }) + .collect::>>()? + }; + + let mut matches = Vec::new(); + for pattern in patterns { + let glob = wax::Glob::new(&pattern).into_diagnostic()?; + for entry in glob.walk(cache_dir).filter_map(std::result::Result::ok) { + if !entry.file_type().is_file() { + continue; + } + if let Some(path_str) = + entry.path().strip_prefix(cache_dir).ok().and_then(|path| path.to_str()) + { + matches.push(path_str.replace('\\', "/")); + } + } + } + matches.sort(); + matches.dedup(); + Ok(matches) + } + + pub fn run(self, config: &Config) -> miette::Result<()> { + let cache_dir = Self::cache_dir(config); + + match self { + CacheCommand::ListRegistries => { + if let Ok(entries) = fs::read_dir(&cache_dir) { + let mut registries: Vec = entries + .filter_map(std::result::Result::ok) + .filter(|entry| entry.file_type().is_ok_and(|file_type| file_type.is_dir())) + .map(|entry| entry.file_name().to_string_lossy().into_owned()) + .collect(); + registries.sort(); + if !registries.is_empty() { + println!("{}", registries.join("\n")); + } + } + } + CacheCommand::List { packages } => { + if !cache_dir.exists() { + return Ok(()); + } + let meta_files = Self::find_metadata_files(config, &cache_dir, &packages)?; + if !meta_files.is_empty() { + println!("{}", meta_files.join("\n")); + } + } + CacheCommand::Delete { packages } => { + if !cache_dir.exists() { + return Ok(()); + } + let meta_files = Self::find_metadata_files(config, &cache_dir, &packages)?; + for meta_file in &meta_files { + fs::remove_file(cache_dir.join(meta_file)).into_diagnostic()?; + } + if !meta_files.is_empty() { + println!("{}", meta_files.join("\n")); + } + } + CacheCommand::View { package } => { + if !cache_dir.exists() { + println!("{{}}"); + return Ok(()); + } + Self::reject_path_traversal(&package)?; + let registry_prefix = Self::registry_prefix(config)?; + // pnpm matches the package name literally as a glob segment. + let pattern = format!("{registry_prefix}/{package}.jsonl"); + + let glob = wax::Glob::new(&pattern).into_diagnostic()?; + let mut meta_file_paths = Vec::new(); + for entry in glob.walk(&cache_dir).filter_map(std::result::Result::ok) { + if !entry.file_type().is_file() { + continue; + } + if let Some(path_str) = + entry.path().strip_prefix(&cache_dir).ok().and_then(|path| path.to_str()) + { + meta_file_paths + .push((path_str.replace('\\', "/"), entry.path().to_path_buf())); + } + } + meta_file_paths.sort(); + + // IndexMap preserves insertion order so the JSON key order is + // deterministic (driven by the sorted file paths), matching + // pnpm's plain-object output. + let mut meta_files_by_path = IndexMap::new(); + + // pnpm's cacheView opens a writable StoreIndex that creates + // index.db when absent, so a fresh/empty store reports every + // version as non-cached rather than erroring. `shared_readonly_in` + // returns None when index.db does not exist, which we treat the + // same way: every lookup is a miss. + let store_index = StoreIndex::shared_readonly_in(&config.store_dir); + let store_index = + store_index.as_ref().map(|index| index.lock().expect("store index mutex")); + + for (file_path, full_path) in meta_file_paths { + let Some(meta_object) = load_meta(&full_path) else { continue }; + let mtime = fs::metadata(&full_path).and_then(|meta| meta.modified()).ok(); + + let mut cached_versions = Vec::new(); + let mut non_cached_versions = Vec::new(); + + for (version, json_frag) in meta_object.versions.fragments() { + let Ok(manifest) = serde_json::from_str::(&json_frag) + else { + continue; + }; + let Some(integrity) = manifest + .get("dist") + .and_then(|dist| dist.get("integrity")) + .and_then(|integrity_value| integrity_value.as_str()) + else { + continue; + }; + + let key = pacquet_store_dir::store_index_key( + integrity, + &format!("{}@{}", meta_object.name, version), + ); + let is_cached = store_index + .as_ref() + .is_some_and(|index| index.contains_key(&key).unwrap_or(false)); + if is_cached { + cached_versions.push(version.clone()); + } else { + non_cached_versions.push(version.clone()); + } + } + + // The output groups versions per registry. The registry + // directory is the top-level component of the cache-relative + // path; for scoped packages the file lives one level deeper + // (`/@scope/name.jsonl`), so `parent()` would be + // wrong. Mirrors pnpm's cacheView walk to the top-most dir. + let registry_name = Path::new(&file_path).components().next().map_or_else( + || ".".to_string(), + |component| component.as_os_str().to_string_lossy().into_owned(), + ); + + meta_files_by_path.insert( + registry_name.replace('+', ":"), + json!({ + "cachedVersions": cached_versions, + "nonCachedVersions": non_cached_versions, + "cachedAt": mtime.map(|time| { + chrono::DateTime::::from(time) + .to_rfc3339_opts(chrono::SecondsFormat::Millis, true) + }), + "distTags": meta_object.dist_tags, + }), + ); + } + + println!( + "{}", + serde_json::to_string_pretty(&meta_files_by_path).into_diagnostic()?, + ); + } + } + + Ok(()) + } +} diff --git a/pacquet/crates/cli/tests/cache.rs b/pacquet/crates/cli/tests/cache.rs new file mode 100644 index 0000000000..9238106cf8 --- /dev/null +++ b/pacquet/crates/cli/tests/cache.rs @@ -0,0 +1,118 @@ +use assert_cmd::prelude::*; +use command_extra::CommandExtra; +use pacquet_testing_utils::bin::CommandTempCwd; +use std::fs; + +#[test] +fn should_list_registries() { + let cwd = CommandTempCwd::init().add_mocked_registry(); + + let cache_dir = cwd.npmrc_info.cache_dir.join("v11").join("metadata"); + fs::create_dir_all(cache_dir.join("registry.npmjs.org")).unwrap(); + fs::create_dir_all(cache_dir.join("registry.yarnpkg.com")).unwrap(); + + let output = cwd + .pacquet + .with_arg("cache") + .with_arg("list-registries") + .assert() + .success() + .get_output() + .stdout + .clone(); + + let stdout = String::from_utf8(output).unwrap(); + assert!(stdout.contains("registry.npmjs.org")); + assert!(stdout.contains("registry.yarnpkg.com")); +} + +#[test] +fn should_list_packages() { + let cwd = CommandTempCwd::init().add_mocked_registry(); + + let cache_dir = cwd.npmrc_info.cache_dir.join("v11").join("metadata"); + let url_str = cwd.npmrc_info.mock_instance.url(); + let registry_name = + pacquet_resolving_npm_resolver::mirror::get_registry_name(&url_str).unwrap(); + fs::create_dir_all(cache_dir.join(®istry_name)).unwrap(); + fs::write(cache_dir.join(®istry_name).join("is-positive.jsonl"), "{}").unwrap(); + fs::write(cache_dir.join(®istry_name).join("is-negative.jsonl"), "{}").unwrap(); + + let output = cwd + .pacquet + .with_arg("cache") + .with_arg("list") + .assert() + .success() + .get_output() + .stdout + .clone(); + + let stdout = String::from_utf8_lossy(&output); + assert!(stdout.contains(&format!("{registry_name}/is-positive.jsonl"))); + assert!(stdout.contains(&format!("{registry_name}/is-negative.jsonl"))); +} + +#[test] +fn should_list_only_files_not_directories() { + let cwd = CommandTempCwd::init().add_mocked_registry(); + + let cache_dir = cwd.npmrc_info.cache_dir.join("v11").join("metadata"); + let url_str = cwd.npmrc_info.mock_instance.url(); + let registry_name = + pacquet_resolving_npm_resolver::mirror::get_registry_name(&url_str).unwrap(); + fs::create_dir_all(cache_dir.join(®istry_name)).unwrap(); + fs::write(cache_dir.join(®istry_name).join("is-positive.jsonl"), "{}").unwrap(); + // A scoped package lives in its own directory, which the glob also matches. + // Only the file underneath it, not the directory itself, should be listed. + fs::create_dir_all(cache_dir.join(®istry_name).join("@scope")).unwrap(); + fs::write(cache_dir.join(®istry_name).join("@scope").join("foo.jsonl"), "{}").unwrap(); + + let output = cwd + .pacquet + .with_arg("cache") + .with_arg("list") + .assert() + .success() + .get_output() + .stdout + .clone(); + + let stdout = String::from_utf8_lossy(&output); + assert!(stdout.contains(&format!("{registry_name}/is-positive.jsonl"))); + assert!(stdout.contains(&format!("{registry_name}/@scope/foo.jsonl"))); + let scope_dir = format!("{registry_name}/@scope"); + assert!( + !stdout.lines().any(|line| line == scope_dir), + "directory entry {scope_dir:?} should not be listed, got: {stdout}", + ); +} + +#[test] +fn should_delete_packages() { + let cwd = CommandTempCwd::init().add_mocked_registry(); + + let cache_dir = cwd.npmrc_info.cache_dir.join("v11").join("metadata"); + let url_str = cwd.npmrc_info.mock_instance.url(); + let registry_name = + pacquet_resolving_npm_resolver::mirror::get_registry_name(&url_str).unwrap(); + fs::create_dir_all(cache_dir.join(®istry_name)).unwrap(); + fs::write(cache_dir.join(®istry_name).join("is-positive.jsonl"), "{}").unwrap(); + fs::write(cache_dir.join(®istry_name).join("is-negative.jsonl"), "{}").unwrap(); + + let output = cwd + .pacquet + .with_arg("cache") + .with_arg("delete") + .with_arg("is-positive") + .assert() + .success() + .get_output() + .stdout + .clone(); + + let stdout = String::from_utf8_lossy(&output); + assert!(stdout.contains(&format!("{registry_name}/is-positive.jsonl"))); + assert!(!cache_dir.join(®istry_name).join("is-positive.jsonl").exists()); + assert!(cache_dir.join(®istry_name).join("is-negative.jsonl").exists()); +} diff --git a/pacquet/crates/resolving-npm-resolver/src/lib.rs b/pacquet/crates/resolving-npm-resolver/src/lib.rs index 12f18cc981..31d66e29df 100644 --- a/pacquet/crates/resolving-npm-resolver/src/lib.rs +++ b/pacquet/crates/resolving-npm-resolver/src/lib.rs @@ -24,7 +24,7 @@ mod fetch_attestation_published_at; mod fetch_full_metadata; mod fetch_full_metadata_cached; mod lookup_context; -mod mirror; +pub mod mirror; mod named_registry; mod named_registry_resolver; mod npm_resolver; diff --git a/pacquet/crates/resolving-npm-resolver/src/mirror.rs b/pacquet/crates/resolving-npm-resolver/src/mirror.rs index 919a25b718..0abe57b98e 100644 --- a/pacquet/crates/resolving-npm-resolver/src/mirror.rs +++ b/pacquet/crates/resolving-npm-resolver/src/mirror.rs @@ -185,6 +185,7 @@ pub fn get_registry_name(registry: &str) -> Result /// sha256 hex suffix so case-insensitive filesystems (HFS+, NTFS by /// default) can't collide it with a lowercase sibling. Mirrors pnpm's /// [`encodePkgName`](https://github.com/pnpm/pnpm/blob/2a9bd897bf/resolving/npm-resolver/src/pickPackage.ts#L555-L560). +#[must_use] pub fn encode_pkg_name(pkg_name: &str) -> String { let lowered = pkg_name.to_lowercase(); if pkg_name == lowered { @@ -431,6 +432,7 @@ fn read_mirror_headers(file: &mut File) -> Option { /// headers, identical to pnpm's /// [`loadMetaHeaders`](https://github.com/pnpm/pnpm/blob/2a9bd897bf/resolving/npm-resolver/src/pickPackage.ts#L627-L644) /// catch-and-return-null. +#[must_use] pub fn load_meta_headers(pkg_mirror: &Path) -> Option { let mut file = File::open(pkg_mirror).ok()?; read_mirror_headers(&mut file) @@ -444,6 +446,7 @@ pub fn load_meta_headers(pkg_mirror: &Path) -> Option { /// catches any error from `readFile` / `JSON.parse` and returns /// `null`; we match that contract because the caller's response to /// "couldn't read" is the same as "no cache". +#[must_use] pub fn load_meta(pkg_mirror: &Path) -> Option { let contents = fs::read(pkg_mirror).ok()?; let newline = contents.iter().position(|&byte| byte == b'\n')?;