diff --git a/Cargo.lock b/Cargo.lock index adebe40efa..4609999942 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3615,6 +3615,7 @@ dependencies = [ "pnpr", "pretty_assertions", "rayon", + "rmp-serde", "serde", "serde-saphyr", "serde_json", diff --git a/pacquet/crates/cli/Cargo.toml b/pacquet/crates/cli/Cargo.toml index 610ab72170..694320f324 100644 --- a/pacquet/crates/cli/Cargo.toml +++ b/pacquet/crates/cli/Cargo.toml @@ -43,24 +43,25 @@ pacquet-workspace-manifest-writer = { workspace = true } pacquet-workspace-projects-graph = { workspace = true } pacquet-workspace-state = { workspace = true } -clap = { workspace = true } -derive_more = { workspace = true } -dialoguer = { workspace = true } -dunce = { workspace = true } -futures-util = { workspace = true } -home = { workspace = true } -indexmap = { workspace = true } -node-semver = { workspace = true } -miette = { workspace = true } -owo-colors = { workspace = true } -pipe-trait = { workspace = true } -rayon = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } -tabled = { workspace = true } -tokio = { workspace = true } -which = { workspace = true } -base64 = { workspace = true } +clap = { workspace = true } +derive_more = { workspace = true } +dialoguer = { workspace = true } +dunce = { workspace = true } +futures-util = { workspace = true } +home = { workspace = true } +indexmap = { workspace = true } +node-semver = { workspace = true } +miette = { workspace = true } +owo-colors.workspace = true +pipe-trait = { workspace = true } +rayon = { workspace = true } +rmp-serde = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +tabled = { workspace = true } +tokio = { workspace = true } +which = { workspace = true } +base64 = { workspace = true } # Windows has no process groups, so a Job Object ties the lifetime of spawned # children (lifecycle scripts and their descendants) to pacquet's. See diff --git a/pacquet/crates/cli/src/cli_args.rs b/pacquet/crates/cli/src/cli_args.rs index c84b010ce5..ec300df52c 100644 --- a/pacquet/crates/cli/src/cli_args.rs +++ b/pacquet/crates/cli/src/cli_args.rs @@ -3,12 +3,14 @@ pub mod cat_file; pub mod create; pub mod dlx; pub mod exec; +pub mod find_hash; pub mod install; pub mod outdated; pub mod recursive; pub mod remove; pub mod restart; pub mod run; +pub mod sanitize; pub mod stop; pub mod store; pub mod supported_architectures; @@ -23,6 +25,7 @@ use clap::{Parser, Subcommand, ValueEnum}; use create::CreateArgs; use dlx::DlxArgs; use exec::ExecArgs; +use find_hash::FindHashArgs; use install::InstallArgs; use miette::{Context, IntoDiagnostic}; use outdated::{OutdatedArgs, OutdatedOutcome}; @@ -158,6 +161,8 @@ pub enum CliCommand { /// Restarts a package. Runs "stop", "restart", and "start" scripts, /// and associated pre- and post- scripts. Restart(RestartArgs), + /// Lists the packages that include the file with the specified hash. + FindHash(FindHashArgs), /// Managing the package store. #[clap(subcommand)] Store(StoreCommand), @@ -459,6 +464,9 @@ impl CliArgs { CliCommand::Restart(args) => { args.run(&dir, config()?, matches!(reporter, ReporterType::Silent))?; } + CliCommand::FindHash(args) => { + args.run(|| config().map(|m| &*m))?; + } CliCommand::Store(command) => command.run(|| config().map(|m| &*m))?, CliCommand::CatFile(args) => { args.run(|| config().map(|m| &*m))?; diff --git a/pacquet/crates/cli/src/cli_args/find_hash.rs b/pacquet/crates/cli/src/cli_args/find_hash.rs new file mode 100644 index 0000000000..673f6f6c8f --- /dev/null +++ b/pacquet/crates/cli/src/cli_args/find_hash.rs @@ -0,0 +1,239 @@ +use crate::cli_args::sanitize::sanitize; +use clap::Args; +use derive_more::{Display, Error}; +use miette::{Context, Diagnostic, IntoDiagnostic}; +use owo_colors::{OwoColorize, Rgb, Stream}; +use pacquet_config::Config; +use pacquet_store_dir::{ + decode_package_files_index, + store_index::{StoreIndex, StoreIndexError}, + transcode_to_plain_msgpack, +}; +use serde::Deserialize; +use std::collections::HashMap; + +#[derive(Debug, Display, Error, Diagnostic)] +#[non_exhaustive] +pub enum FindHashError { + #[display("No package or index file matching this hash was found.")] + #[diagnostic(code(ERR_PNPM_INVALID_FILE_HASH))] + InvalidFileHash, + + #[display("{source}")] + #[diagnostic(transparent)] + StoreIndex { + #[error(source)] + source: StoreIndexError, + }, + + #[display("Failed to decode package_index row {key:?}: {source}")] + CorruptStoreIndexRow { + key: String, + #[error(source)] + source: StoreIndexError, + }, +} + +#[derive(Debug, Args)] +pub struct FindHashArgs { + /// The hash of the file to search for. Can be a hex string or shaN-base64 format. + pub hash: String, +} + +impl From for FindHashError { + fn from(source: StoreIndexError) -> Self { + Self::StoreIndex { source } + } +} + +const EXPECTED_HEX_LENGTH: usize = 128; +const EXPECTED_SHA512_BYTES: usize = 64; +const MAX_SHA512_BASE64_LENGTH: usize = 88; + +impl FindHashArgs { + pub fn run<'a>( + self, + config: impl FnOnce() -> miette::Result<&'a Config>, + ) -> miette::Result<()> { + let hash = parse_hash(self.hash)?; + + let config = config()?; + let store_dir = &config.store_dir; + + let store_index = if config.frozen_store { + StoreIndex::open_immutable(store_dir.root()) + .into_diagnostic() + .wrap_err("Failed to open store index (frozen)")? + } else { + StoreIndex::open_readonly_in(store_dir) + .into_diagnostic() + .wrap_err("Failed to open store index")? + }; + + let mut results = Vec::new(); + + store_index.for_each_raw(|index_key, bytes| -> Result<(), FindHashError> { + let data = decode_find_hash_index(&bytes).map_err(|source| { + FindHashError::CorruptStoreIndexRow { key: index_key.clone(), source } + })?; + if !contains_hash(&data, &hash) { + return Ok(()); + } + + let (name, version) = package_identity(&bytes).map_err(|source| { + FindHashError::CorruptStoreIndexRow { key: index_key.clone(), source } + })?; + results.push((name, version, index_key)); + Ok(()) + })?; + + if results.is_empty() { + return Err(FindHashError::InvalidFileHash.into()); + } + + for (name, version, index_key) in results { + println!( + "{}@{} {}", + package_info(&name), + package_info(&version), + index_path(&index_key), + ); + } + + Ok(()) + } +} + +fn parse_hash(mut hash: String) -> miette::Result { + if hash.contains('-') { + let Some((algo, base64_part)) = hash.split_once('-') else { + return Err(miette::miette!( + "Invalid hash format. Expected something like sha512-..., got {}", + hash + )); + }; + if !algo.eq_ignore_ascii_case("sha512") { + return Err(miette::miette!( + "Unsupported hash algorithm \"{algo}\". Only \"sha512\" is supported." + )); + } + if base64_part.len() > MAX_SHA512_BASE64_LENGTH { + return Err(miette::miette!( + "Invalid hash format: sha512 base64 payload has {} character(s), expected at most {MAX_SHA512_BASE64_LENGTH}.", + base64_part.len(), + )); + } + use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64}; + let decoded = BASE64 + .decode(base64_part) + .or_else(|_| { + use base64::{ + Engine as _, engine::general_purpose::STANDARD_NO_PAD as BASE64_NO_PAD, + }; + BASE64_NO_PAD.decode(base64_part) + }) + .into_diagnostic() + .wrap_err("Failed to decode base64 hash")?; + if decoded.len() != EXPECTED_SHA512_BYTES { + return Err(miette::miette!( + "Decoded hash is {} bytes, expected {EXPECTED_SHA512_BYTES} bytes for sha512.", + decoded.len(), + )); + } + use std::fmt::Write as _; + let mut hex = String::with_capacity(decoded.len() * 2); + for b in decoded { + write!(&mut hex, "{b:02x}").into_diagnostic()?; + } + return Ok(hex); + } + + if !hash.chars().all(|c| c.is_ascii_hexdigit()) { + return Err(miette::miette!( + "Invalid hash format: \"{hash}\" contains non-hexadecimal characters. \ + Expected a 128-character hex string or a sha512-base64 format." + )); + } + if hash.len() != EXPECTED_HEX_LENGTH { + return Err(miette::miette!( + "Invalid hash format: \"{hash}\" has {} character(s), expected {EXPECTED_HEX_LENGTH}.", + hash.len(), + )); + } + hash.make_ascii_lowercase(); + Ok(hash) +} + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +struct FindHashPackageIndex { + algo: String, + files: HashMap, + side_effects: Option>, +} + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +struct FindHashFileInfo { + digest: String, +} + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +struct FindHashSideEffectsDiff { + added: Option>, +} + +fn decode_find_hash_index(bytes: &[u8]) -> Result { + let plain = transcode_to_plain_msgpack(bytes) + .map_err(|source| StoreIndexError::Transcode { source })?; + rmp_serde::from_slice(&plain).map_err(|source| StoreIndexError::Decode { source }) +} + +fn contains_hash(data: &FindHashPackageIndex, hash: &str) -> bool { + data.algo == "sha512" + && (data.files.values().any(|file| file.digest == hash) + || data.side_effects.as_ref().is_some_and(|side_effects| { + side_effects.values().any(|side_effect| { + side_effect + .added + .as_ref() + .is_some_and(|added| added.values().any(|file| file.digest == hash)) + }) + })) +} + +fn package_identity(bytes: &[u8]) -> Result<(String, String), StoreIndexError> { + let data = decode_package_files_index(bytes)?; + let name = data + .manifest + .as_ref() + .and_then(|manifest| { + manifest.get("name").and_then(|n| n.as_str()).map(std::string::ToString::to_string) + }) + .unwrap_or_else(|| "unknown".to_string()); + let version = data + .manifest + .as_ref() + .and_then(|manifest| { + manifest.get("version").and_then(|n| n.as_str()).map(std::string::ToString::to_string) + }) + .unwrap_or_else(|| "unknown".to_string()); + Ok((name, version)) +} + +/// Color a package name/version like pnpm's `PACKAGE_INFO_CLR = chalk.greenBright`. +/// `chalk` suppresses color when stdout is not a TTY, so this only emits ANSI +/// when stdout supports color. +fn package_info(text: &str) -> String { + sanitize(text).as_ref().if_supports_color(Stream::Stdout, |t| t.bright_green()).to_string() +} + +/// Color an index key like pnpm's `INDEX_PATH_CLR = chalk.hex('#078487')` +/// (`#078487` is `rgb(7, 132, 135)`). See [`package_info`] for the TTY behavior. +fn index_path(text: &str) -> String { + sanitize(text) + .as_ref() + .if_supports_color(Stream::Stdout, |t| t.color(Rgb(7, 132, 135))) + .to_string() +} diff --git a/pacquet/crates/cli/src/cli_args/sanitize.rs b/pacquet/crates/cli/src/cli_args/sanitize.rs new file mode 100644 index 0000000000..b91deb5604 --- /dev/null +++ b/pacquet/crates/cli/src/cli_args/sanitize.rs @@ -0,0 +1,18 @@ +use std::borrow::Cow; + +/// Strip control characters from store-derived text before it reaches the +/// terminal, keeping `\n` and `\t`. Prevents stored metadata from emitting +/// raw escape sequences to the user's terminal. +pub fn sanitize(text: &str) -> Cow<'_, str> { + if text.bytes().any(|byte| byte < 0x20 && byte != b'\n' && byte != b'\t') { + Cow::Owned( + text.chars() + .filter(|character| { + !character.is_control() || *character == '\n' || *character == '\t' + }) + .collect(), + ) + } else { + Cow::Borrowed(text) + } +} diff --git a/pacquet/crates/cli/src/cli_args/why.rs b/pacquet/crates/cli/src/cli_args/why.rs index 45be6b8d14..3588ec69ef 100644 --- a/pacquet/crates/cli/src/cli_args/why.rs +++ b/pacquet/crates/cli/src/cli_args/why.rs @@ -6,7 +6,7 @@ //! [`buildDependentsTree`](https://github.com/pnpm/pnpm/blob/deps/inspection/tree-builder/src/buildDependentsTree.ts). //! -use crate::State; +use crate::{State, cli_args::sanitize::sanitize}; use clap::Args; use owo_colors::{OwoColorize, Stream}; use pacquet_config::matcher::{Matcher, create_matcher}; @@ -488,19 +488,5 @@ fn dim(text: &str) -> String { cleaned.as_ref().if_supports_color(Stream::Stdout, |t| t.dimmed()).to_string() } -fn sanitize(text: &str) -> std::borrow::Cow<'_, str> { - if text.bytes().any(|byte| byte < 0x20 && byte != b'\n' && byte != b'\t') { - std::borrow::Cow::Owned( - text.chars() - .filter(|character| { - !character.is_control() || *character == '\n' || *character == '\t' - }) - .collect(), - ) - } else { - std::borrow::Cow::Borrowed(text) - } -} - #[cfg(test)] mod tests; diff --git a/pacquet/crates/cli/tests/find_hash.rs b/pacquet/crates/cli/tests/find_hash.rs new file mode 100644 index 0000000000..885ee86655 --- /dev/null +++ b/pacquet/crates/cli/tests/find_hash.rs @@ -0,0 +1,119 @@ +use assert_cmd::prelude::*; +use pacquet_store_dir::store_index::StoreIndex; +use pacquet_testing_utils::bin::CommandTempCwd; + +fn find_hash_fixture(store_index: &StoreIndex) -> (String, String, String) { + let keys = store_index.keys().unwrap(); + assert!(!keys.is_empty(), "Store index should have at least one key"); + + let entries = store_index.get_many(&keys).unwrap(); + for (_key, data) in entries { + let Some(manifest) = &data.manifest else { continue }; + let Some(expected_name) = manifest.get("name").and_then(|value| value.as_str()) else { + continue; + }; + let Some(expected_version) = manifest.get("version").and_then(|value| value.as_str()) + else { + continue; + }; + if let Some(file) = data.files.values().next() { + return (file.digest.clone(), expected_name.to_string(), expected_version.to_string()); + } + } + + panic!("Should find a package hash with a non-empty name@version in the store index"); +} + +#[test] +fn find_hash_works() { + let CommandTempCwd { mut pacquet, workspace, root: _root, npmrc_info, .. } = + CommandTempCwd::init().add_mocked_registry(); + + // 1. Install a package to populate the store index + pacquet.arg("add").arg("is-odd@3.0.1").assert().success(); + + let store_dir = pacquet_store_dir::StoreDir::from(npmrc_info.store_dir); + let store_index = StoreIndex::open_readonly_in(&store_dir).unwrap(); + let (valid_hash, expected_name, expected_version) = find_hash_fixture(&store_index); + + // 2. Run find-hash with the valid hash + let mut pacquet2 = std::process::Command::cargo_bin("pacquet").unwrap(); + pacquet2.current_dir(&workspace); + let output = pacquet2.arg("find-hash").arg(&valid_hash).assert().success(); + let stdout = String::from_utf8_lossy(&output.get_output().stdout); + + println!("STDOUT: {stdout}"); + + // Output should contain the package name and version we extracted the hash from + assert!(stdout.contains(&expected_name), "Expected stdout to contain name {expected_name}"); + assert!( + stdout.contains(&expected_version), + "Expected stdout to contain version {expected_version}", + ); +} + +#[test] +fn should_fail_on_missing_hash() { + let CommandTempCwd { mut pacquet, workspace, root: _root, .. } = + CommandTempCwd::init().add_mocked_registry(); + // Install a package first so the store index exists. + pacquet.arg("add").arg("is-odd@3.0.1").assert().success(); + // Use a valid-length hex string that no file matches. Create a fresh + // command so the args from `add` don't carry over. + let mut pacquet2 = std::process::Command::cargo_bin("pacquet").unwrap(); + pacquet2.current_dir(&workspace); + let output = pacquet2.arg("find-hash").arg("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff").assert().failure(); + let stderr = String::from_utf8_lossy(&output.get_output().stderr); + assert!(stderr.contains("ERR_PNPM_INVALID_FILE_HASH")); +} + +#[test] +fn should_fail_on_invalid_base64() { + let CommandTempCwd { mut pacquet, root: _root, .. } = + CommandTempCwd::init().add_mocked_registry(); + let output = pacquet.arg("find-hash").arg("sha512-InvalidBase64!!!").assert().failure(); + let stderr = String::from_utf8_lossy(&output.get_output().stderr); + assert!(stderr.contains("Failed to decode base64 hash")); +} + +#[test] +fn should_fail_on_oversized_base64() { + let CommandTempCwd { mut pacquet, root: _root, .. } = + CommandTempCwd::init().add_mocked_registry(); + let hash = format!("sha512-{}", "A".repeat(1_000)); + let output = pacquet.arg("find-hash").arg(hash).assert().failure(); + let stderr = String::from_utf8_lossy(&output.get_output().stderr); + assert!(stderr.contains("sha512 base64 payload has 1000 character(s)")); +} + +#[test] +fn find_hash_works_with_base64() { + let CommandTempCwd { mut pacquet, workspace, root: _root, npmrc_info, .. } = + CommandTempCwd::init().add_mocked_registry(); + + pacquet.arg("add").arg("is-odd@3.0.1").assert().success(); + + let store_dir = pacquet_store_dir::StoreDir::from(npmrc_info.store_dir); + let store_index = StoreIndex::open_readonly_in(&store_dir).unwrap(); + let (hex_hash, expected_name, expected_version) = find_hash_fixture(&store_index); + + // Convert hex to base64 + use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64}; + let bytes = (0..hex_hash.len()) + .step_by(2) + .map(|i| u8::from_str_radix(&hex_hash[i..i + 2], 16).unwrap()) + .collect::>(); + let base64_hash = format!("sha512-{}", BASE64.encode(&bytes)); + + let mut pacquet2 = std::process::Command::cargo_bin("pacquet").unwrap(); + pacquet2.current_dir(&workspace); + let output = pacquet2.arg("find-hash").arg(&base64_hash).assert().success(); + let stdout = String::from_utf8_lossy(&output.get_output().stdout); + + println!("STDOUT: {stdout}"); + assert!(stdout.contains(&expected_name), "Expected stdout to contain name {expected_name}"); + assert!( + stdout.contains(&expected_version), + "Expected stdout to contain version {expected_version}", + ); +} diff --git a/pacquet/crates/store-dir/src/lib.rs b/pacquet/crates/store-dir/src/lib.rs index bb2b8fa4fb..7d99b29c4f 100644 --- a/pacquet/crates/store-dir/src/lib.rs +++ b/pacquet/crates/store-dir/src/lib.rs @@ -5,7 +5,7 @@ mod msgpackr_records; mod project_registry; mod prune; mod store_dir; -mod store_index; +pub mod store_index; mod upload; pub use add_files_from_dir::*; diff --git a/pacquet/crates/store-dir/src/store_index.rs b/pacquet/crates/store-dir/src/store_index.rs index 936e5df0c0..c3b556d4a9 100644 --- a/pacquet/crates/store-dir/src/store_index.rs +++ b/pacquet/crates/store-dir/src/store_index.rs @@ -649,6 +649,31 @@ impl StoreIndex { Ok(out) } + /// Visit every raw `package_index` row without first collecting the + /// full key set. This mirrors pnpm's `StoreIndex.entries()` shape while + /// leaving decode policy to the caller. + pub fn for_each_raw( + &self, + mut visit: impl FnMut(String, Vec) -> Result<(), VisitError>, + ) -> Result<(), VisitError> + where + VisitError: From, + { + let mut stmt = self + .conn + .prepare("SELECT key, data FROM package_index") + .map_err(|source| VisitError::from(StoreIndexError::Read { source }))?; + let rows = stmt + .query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, Vec>(1)?))) + .map_err(|source| VisitError::from(StoreIndexError::Read { source }))?; + for row in rows { + let (key, data) = + row.map_err(|source| VisitError::from(StoreIndexError::Read { source }))?; + visit(key, data)?; + } + Ok(()) + } + /// Batched existence probe: the subset of `keys` that have a row in /// `package_index`. Same chunked `WHERE key IN` shape (and SQL-injection /// posture) as [`Self::get_many_raw`], but selects only the key column, diff --git a/pacquet/crates/store-dir/src/store_index/tests.rs b/pacquet/crates/store-dir/src/store_index/tests.rs index 7b2cdd80bd..43af35c9c1 100644 --- a/pacquet/crates/store-dir/src/store_index/tests.rs +++ b/pacquet/crates/store-dir/src/store_index/tests.rs @@ -1,6 +1,6 @@ use super::{ - CafsFileInfo, GET_MANY_CHUNK, PackageFilesIndex, StoreIndex, git_hosted_store_index_key, - immutable_sqlite_uri, pick_store_index_key, store_index_key, + CafsFileInfo, GET_MANY_CHUNK, PackageFilesIndex, StoreIndex, StoreIndexError, + git_hosted_store_index_key, immutable_sqlite_uri, pick_store_index_key, store_index_key, }; use crate::StoreDir; use pretty_assertions::assert_eq; @@ -254,6 +254,30 @@ fn get_many_all_hit_returns_every_row() { } } +#[test] +fn for_each_raw_visits_every_row() { + let dir = tempdir().unwrap(); + let idx = StoreIndex::open(dir.path()).unwrap(); + let payload = sample_index(); + let mut keys: Vec = + (0..3).map(|index| store_index_key("sha512-x", &format!("pkg{index}@1.0.0"))).collect(); + for key in &keys { + idx.set(key, &payload).unwrap(); + } + + let mut visited = Vec::new(); + idx.for_each_raw(|key, data| { + assert!(!data.is_empty()); + visited.push(key); + Ok::<(), StoreIndexError>(()) + }) + .unwrap(); + + keys.sort(); + visited.sort(); + assert_eq!(visited, keys); +} + #[test] fn get_many_mixed_hit_and_miss_returns_only_hits() { let dir = tempdir().unwrap();