feat(pacquet): implement find-hash (#12552)

- Added `find-hash` subcommand in `pacquet/crates/cli/src/cli_args/find_hash.rs`.
- Scans `StoreIndex` for `PackageFilesIndex` matching the given `digest`.
- Outputs matched package names and versions.

---------

Co-authored-by: Zoltan Kochan <z@kochan.io>
This commit is contained in:
Alessio Attilio
2026-06-22 03:30:23 +02:00
committed by GitHub
parent 4d3fe4b495
commit e72b482b6f
10 changed files with 457 additions and 36 deletions

1
Cargo.lock generated
View File

@@ -3615,6 +3615,7 @@ dependencies = [
"pnpr",
"pretty_assertions",
"rayon",
"rmp-serde",
"serde",
"serde-saphyr",
"serde_json",

View File

@@ -43,24 +43,25 @@ pacquet-workspace-manifest-writer = { workspace = true }
pacquet-workspace-projects-graph = { workspace = true }
pacquet-workspace-state = { workspace = true }
clap = { workspace = true }
derive_more = { workspace = true }
dialoguer = { workspace = true }
dunce = { workspace = true }
futures-util = { workspace = true }
home = { workspace = true }
indexmap = { workspace = true }
node-semver = { workspace = true }
miette = { workspace = true }
owo-colors = { workspace = true }
pipe-trait = { workspace = true }
rayon = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tabled = { workspace = true }
tokio = { workspace = true }
which = { workspace = true }
base64 = { workspace = true }
clap = { workspace = true }
derive_more = { workspace = true }
dialoguer = { workspace = true }
dunce = { workspace = true }
futures-util = { workspace = true }
home = { workspace = true }
indexmap = { workspace = true }
node-semver = { workspace = true }
miette = { workspace = true }
owo-colors.workspace = true
pipe-trait = { workspace = true }
rayon = { workspace = true }
rmp-serde = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tabled = { workspace = true }
tokio = { workspace = true }
which = { workspace = true }
base64 = { workspace = true }
# Windows has no process groups, so a Job Object ties the lifetime of spawned
# children (lifecycle scripts and their descendants) to pacquet's. See

View File

@@ -3,12 +3,14 @@ pub mod cat_file;
pub mod create;
pub mod dlx;
pub mod exec;
pub mod find_hash;
pub mod install;
pub mod outdated;
pub mod recursive;
pub mod remove;
pub mod restart;
pub mod run;
pub mod sanitize;
pub mod stop;
pub mod store;
pub mod supported_architectures;
@@ -23,6 +25,7 @@ use clap::{Parser, Subcommand, ValueEnum};
use create::CreateArgs;
use dlx::DlxArgs;
use exec::ExecArgs;
use find_hash::FindHashArgs;
use install::InstallArgs;
use miette::{Context, IntoDiagnostic};
use outdated::{OutdatedArgs, OutdatedOutcome};
@@ -158,6 +161,8 @@ pub enum CliCommand {
/// Restarts a package. Runs "stop", "restart", and "start" scripts,
/// and associated pre- and post- scripts.
Restart(RestartArgs),
/// Lists the packages that include the file with the specified hash.
FindHash(FindHashArgs),
/// Managing the package store.
#[clap(subcommand)]
Store(StoreCommand),
@@ -459,6 +464,9 @@ impl CliArgs {
CliCommand::Restart(args) => {
args.run(&dir, config()?, matches!(reporter, ReporterType::Silent))?;
}
CliCommand::FindHash(args) => {
args.run(|| config().map(|m| &*m))?;
}
CliCommand::Store(command) => command.run(|| config().map(|m| &*m))?,
CliCommand::CatFile(args) => {
args.run(|| config().map(|m| &*m))?;

View File

@@ -0,0 +1,239 @@
use crate::cli_args::sanitize::sanitize;
use clap::Args;
use derive_more::{Display, Error};
use miette::{Context, Diagnostic, IntoDiagnostic};
use owo_colors::{OwoColorize, Rgb, Stream};
use pacquet_config::Config;
use pacquet_store_dir::{
decode_package_files_index,
store_index::{StoreIndex, StoreIndexError},
transcode_to_plain_msgpack,
};
use serde::Deserialize;
use std::collections::HashMap;
#[derive(Debug, Display, Error, Diagnostic)]
#[non_exhaustive]
pub enum FindHashError {
#[display("No package or index file matching this hash was found.")]
#[diagnostic(code(ERR_PNPM_INVALID_FILE_HASH))]
InvalidFileHash,
#[display("{source}")]
#[diagnostic(transparent)]
StoreIndex {
#[error(source)]
source: StoreIndexError,
},
#[display("Failed to decode package_index row {key:?}: {source}")]
CorruptStoreIndexRow {
key: String,
#[error(source)]
source: StoreIndexError,
},
}
#[derive(Debug, Args)]
pub struct FindHashArgs {
/// The hash of the file to search for. Can be a hex string or shaN-base64 format.
pub hash: String,
}
impl From<StoreIndexError> for FindHashError {
fn from(source: StoreIndexError) -> Self {
Self::StoreIndex { source }
}
}
const EXPECTED_HEX_LENGTH: usize = 128;
const EXPECTED_SHA512_BYTES: usize = 64;
const MAX_SHA512_BASE64_LENGTH: usize = 88;
impl FindHashArgs {
pub fn run<'a>(
self,
config: impl FnOnce() -> miette::Result<&'a Config>,
) -> miette::Result<()> {
let hash = parse_hash(self.hash)?;
let config = config()?;
let store_dir = &config.store_dir;
let store_index = if config.frozen_store {
StoreIndex::open_immutable(store_dir.root())
.into_diagnostic()
.wrap_err("Failed to open store index (frozen)")?
} else {
StoreIndex::open_readonly_in(store_dir)
.into_diagnostic()
.wrap_err("Failed to open store index")?
};
let mut results = Vec::new();
store_index.for_each_raw(|index_key, bytes| -> Result<(), FindHashError> {
let data = decode_find_hash_index(&bytes).map_err(|source| {
FindHashError::CorruptStoreIndexRow { key: index_key.clone(), source }
})?;
if !contains_hash(&data, &hash) {
return Ok(());
}
let (name, version) = package_identity(&bytes).map_err(|source| {
FindHashError::CorruptStoreIndexRow { key: index_key.clone(), source }
})?;
results.push((name, version, index_key));
Ok(())
})?;
if results.is_empty() {
return Err(FindHashError::InvalidFileHash.into());
}
for (name, version, index_key) in results {
println!(
"{}@{} {}",
package_info(&name),
package_info(&version),
index_path(&index_key),
);
}
Ok(())
}
}
fn parse_hash(mut hash: String) -> miette::Result<String> {
if hash.contains('-') {
let Some((algo, base64_part)) = hash.split_once('-') else {
return Err(miette::miette!(
"Invalid hash format. Expected something like sha512-..., got {}",
hash
));
};
if !algo.eq_ignore_ascii_case("sha512") {
return Err(miette::miette!(
"Unsupported hash algorithm \"{algo}\". Only \"sha512\" is supported."
));
}
if base64_part.len() > MAX_SHA512_BASE64_LENGTH {
return Err(miette::miette!(
"Invalid hash format: sha512 base64 payload has {} character(s), expected at most {MAX_SHA512_BASE64_LENGTH}.",
base64_part.len(),
));
}
use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64};
let decoded = BASE64
.decode(base64_part)
.or_else(|_| {
use base64::{
Engine as _, engine::general_purpose::STANDARD_NO_PAD as BASE64_NO_PAD,
};
BASE64_NO_PAD.decode(base64_part)
})
.into_diagnostic()
.wrap_err("Failed to decode base64 hash")?;
if decoded.len() != EXPECTED_SHA512_BYTES {
return Err(miette::miette!(
"Decoded hash is {} bytes, expected {EXPECTED_SHA512_BYTES} bytes for sha512.",
decoded.len(),
));
}
use std::fmt::Write as _;
let mut hex = String::with_capacity(decoded.len() * 2);
for b in decoded {
write!(&mut hex, "{b:02x}").into_diagnostic()?;
}
return Ok(hex);
}
if !hash.chars().all(|c| c.is_ascii_hexdigit()) {
return Err(miette::miette!(
"Invalid hash format: \"{hash}\" contains non-hexadecimal characters. \
Expected a 128-character hex string or a sha512-base64 format."
));
}
if hash.len() != EXPECTED_HEX_LENGTH {
return Err(miette::miette!(
"Invalid hash format: \"{hash}\" has {} character(s), expected {EXPECTED_HEX_LENGTH}.",
hash.len(),
));
}
hash.make_ascii_lowercase();
Ok(hash)
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct FindHashPackageIndex {
algo: String,
files: HashMap<String, FindHashFileInfo>,
side_effects: Option<HashMap<String, FindHashSideEffectsDiff>>,
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct FindHashFileInfo {
digest: String,
}
#[derive(Deserialize)]
#[serde(rename_all = "camelCase")]
struct FindHashSideEffectsDiff {
added: Option<HashMap<String, FindHashFileInfo>>,
}
fn decode_find_hash_index(bytes: &[u8]) -> Result<FindHashPackageIndex, StoreIndexError> {
let plain = transcode_to_plain_msgpack(bytes)
.map_err(|source| StoreIndexError::Transcode { source })?;
rmp_serde::from_slice(&plain).map_err(|source| StoreIndexError::Decode { source })
}
fn contains_hash(data: &FindHashPackageIndex, hash: &str) -> bool {
data.algo == "sha512"
&& (data.files.values().any(|file| file.digest == hash)
|| data.side_effects.as_ref().is_some_and(|side_effects| {
side_effects.values().any(|side_effect| {
side_effect
.added
.as_ref()
.is_some_and(|added| added.values().any(|file| file.digest == hash))
})
}))
}
fn package_identity(bytes: &[u8]) -> Result<(String, String), StoreIndexError> {
let data = decode_package_files_index(bytes)?;
let name = data
.manifest
.as_ref()
.and_then(|manifest| {
manifest.get("name").and_then(|n| n.as_str()).map(std::string::ToString::to_string)
})
.unwrap_or_else(|| "unknown".to_string());
let version = data
.manifest
.as_ref()
.and_then(|manifest| {
manifest.get("version").and_then(|n| n.as_str()).map(std::string::ToString::to_string)
})
.unwrap_or_else(|| "unknown".to_string());
Ok((name, version))
}
/// Color a package name/version like pnpm's `PACKAGE_INFO_CLR = chalk.greenBright`.
/// `chalk` suppresses color when stdout is not a TTY, so this only emits ANSI
/// when stdout supports color.
fn package_info(text: &str) -> String {
sanitize(text).as_ref().if_supports_color(Stream::Stdout, |t| t.bright_green()).to_string()
}
/// Color an index key like pnpm's `INDEX_PATH_CLR = chalk.hex('#078487')`
/// (`#078487` is `rgb(7, 132, 135)`). See [`package_info`] for the TTY behavior.
fn index_path(text: &str) -> String {
sanitize(text)
.as_ref()
.if_supports_color(Stream::Stdout, |t| t.color(Rgb(7, 132, 135)))
.to_string()
}

View File

@@ -0,0 +1,18 @@
use std::borrow::Cow;
/// Strip control characters from store-derived text before it reaches the
/// terminal, keeping `\n` and `\t`. Prevents stored metadata from emitting
/// raw escape sequences to the user's terminal.
pub fn sanitize(text: &str) -> Cow<'_, str> {
if text.bytes().any(|byte| byte < 0x20 && byte != b'\n' && byte != b'\t') {
Cow::Owned(
text.chars()
.filter(|character| {
!character.is_control() || *character == '\n' || *character == '\t'
})
.collect(),
)
} else {
Cow::Borrowed(text)
}
}

View File

@@ -6,7 +6,7 @@
//! [`buildDependentsTree`](https://github.com/pnpm/pnpm/blob/deps/inspection/tree-builder/src/buildDependentsTree.ts).
//!
use crate::State;
use crate::{State, cli_args::sanitize::sanitize};
use clap::Args;
use owo_colors::{OwoColorize, Stream};
use pacquet_config::matcher::{Matcher, create_matcher};
@@ -488,19 +488,5 @@ fn dim(text: &str) -> String {
cleaned.as_ref().if_supports_color(Stream::Stdout, |t| t.dimmed()).to_string()
}
fn sanitize(text: &str) -> std::borrow::Cow<'_, str> {
if text.bytes().any(|byte| byte < 0x20 && byte != b'\n' && byte != b'\t') {
std::borrow::Cow::Owned(
text.chars()
.filter(|character| {
!character.is_control() || *character == '\n' || *character == '\t'
})
.collect(),
)
} else {
std::borrow::Cow::Borrowed(text)
}
}
#[cfg(test)]
mod tests;

View File

@@ -0,0 +1,119 @@
use assert_cmd::prelude::*;
use pacquet_store_dir::store_index::StoreIndex;
use pacquet_testing_utils::bin::CommandTempCwd;
fn find_hash_fixture(store_index: &StoreIndex) -> (String, String, String) {
let keys = store_index.keys().unwrap();
assert!(!keys.is_empty(), "Store index should have at least one key");
let entries = store_index.get_many(&keys).unwrap();
for (_key, data) in entries {
let Some(manifest) = &data.manifest else { continue };
let Some(expected_name) = manifest.get("name").and_then(|value| value.as_str()) else {
continue;
};
let Some(expected_version) = manifest.get("version").and_then(|value| value.as_str())
else {
continue;
};
if let Some(file) = data.files.values().next() {
return (file.digest.clone(), expected_name.to_string(), expected_version.to_string());
}
}
panic!("Should find a package hash with a non-empty name@version in the store index");
}
#[test]
fn find_hash_works() {
let CommandTempCwd { mut pacquet, workspace, root: _root, npmrc_info, .. } =
CommandTempCwd::init().add_mocked_registry();
// 1. Install a package to populate the store index
pacquet.arg("add").arg("is-odd@3.0.1").assert().success();
let store_dir = pacquet_store_dir::StoreDir::from(npmrc_info.store_dir);
let store_index = StoreIndex::open_readonly_in(&store_dir).unwrap();
let (valid_hash, expected_name, expected_version) = find_hash_fixture(&store_index);
// 2. Run find-hash with the valid hash
let mut pacquet2 = std::process::Command::cargo_bin("pacquet").unwrap();
pacquet2.current_dir(&workspace);
let output = pacquet2.arg("find-hash").arg(&valid_hash).assert().success();
let stdout = String::from_utf8_lossy(&output.get_output().stdout);
println!("STDOUT: {stdout}");
// Output should contain the package name and version we extracted the hash from
assert!(stdout.contains(&expected_name), "Expected stdout to contain name {expected_name}");
assert!(
stdout.contains(&expected_version),
"Expected stdout to contain version {expected_version}",
);
}
#[test]
fn should_fail_on_missing_hash() {
let CommandTempCwd { mut pacquet, workspace, root: _root, .. } =
CommandTempCwd::init().add_mocked_registry();
// Install a package first so the store index exists.
pacquet.arg("add").arg("is-odd@3.0.1").assert().success();
// Use a valid-length hex string that no file matches. Create a fresh
// command so the args from `add` don't carry over.
let mut pacquet2 = std::process::Command::cargo_bin("pacquet").unwrap();
pacquet2.current_dir(&workspace);
let output = pacquet2.arg("find-hash").arg("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff").assert().failure();
let stderr = String::from_utf8_lossy(&output.get_output().stderr);
assert!(stderr.contains("ERR_PNPM_INVALID_FILE_HASH"));
}
#[test]
fn should_fail_on_invalid_base64() {
let CommandTempCwd { mut pacquet, root: _root, .. } =
CommandTempCwd::init().add_mocked_registry();
let output = pacquet.arg("find-hash").arg("sha512-InvalidBase64!!!").assert().failure();
let stderr = String::from_utf8_lossy(&output.get_output().stderr);
assert!(stderr.contains("Failed to decode base64 hash"));
}
#[test]
fn should_fail_on_oversized_base64() {
let CommandTempCwd { mut pacquet, root: _root, .. } =
CommandTempCwd::init().add_mocked_registry();
let hash = format!("sha512-{}", "A".repeat(1_000));
let output = pacquet.arg("find-hash").arg(hash).assert().failure();
let stderr = String::from_utf8_lossy(&output.get_output().stderr);
assert!(stderr.contains("sha512 base64 payload has 1000 character(s)"));
}
#[test]
fn find_hash_works_with_base64() {
let CommandTempCwd { mut pacquet, workspace, root: _root, npmrc_info, .. } =
CommandTempCwd::init().add_mocked_registry();
pacquet.arg("add").arg("is-odd@3.0.1").assert().success();
let store_dir = pacquet_store_dir::StoreDir::from(npmrc_info.store_dir);
let store_index = StoreIndex::open_readonly_in(&store_dir).unwrap();
let (hex_hash, expected_name, expected_version) = find_hash_fixture(&store_index);
// Convert hex to base64
use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64};
let bytes = (0..hex_hash.len())
.step_by(2)
.map(|i| u8::from_str_radix(&hex_hash[i..i + 2], 16).unwrap())
.collect::<Vec<u8>>();
let base64_hash = format!("sha512-{}", BASE64.encode(&bytes));
let mut pacquet2 = std::process::Command::cargo_bin("pacquet").unwrap();
pacquet2.current_dir(&workspace);
let output = pacquet2.arg("find-hash").arg(&base64_hash).assert().success();
let stdout = String::from_utf8_lossy(&output.get_output().stdout);
println!("STDOUT: {stdout}");
assert!(stdout.contains(&expected_name), "Expected stdout to contain name {expected_name}");
assert!(
stdout.contains(&expected_version),
"Expected stdout to contain version {expected_version}",
);
}

View File

@@ -5,7 +5,7 @@ mod msgpackr_records;
mod project_registry;
mod prune;
mod store_dir;
mod store_index;
pub mod store_index;
mod upload;
pub use add_files_from_dir::*;

View File

@@ -649,6 +649,31 @@ impl StoreIndex {
Ok(out)
}
/// Visit every raw `package_index` row without first collecting the
/// full key set. This mirrors pnpm's `StoreIndex.entries()` shape while
/// leaving decode policy to the caller.
pub fn for_each_raw<VisitError>(
&self,
mut visit: impl FnMut(String, Vec<u8>) -> Result<(), VisitError>,
) -> Result<(), VisitError>
where
VisitError: From<StoreIndexError>,
{
let mut stmt = self
.conn
.prepare("SELECT key, data FROM package_index")
.map_err(|source| VisitError::from(StoreIndexError::Read { source }))?;
let rows = stmt
.query_map([], |row| Ok((row.get::<_, String>(0)?, row.get::<_, Vec<u8>>(1)?)))
.map_err(|source| VisitError::from(StoreIndexError::Read { source }))?;
for row in rows {
let (key, data) =
row.map_err(|source| VisitError::from(StoreIndexError::Read { source }))?;
visit(key, data)?;
}
Ok(())
}
/// Batched existence probe: the subset of `keys` that have a row in
/// `package_index`. Same chunked `WHERE key IN` shape (and SQL-injection
/// posture) as [`Self::get_many_raw`], but selects only the key column,

View File

@@ -1,6 +1,6 @@
use super::{
CafsFileInfo, GET_MANY_CHUNK, PackageFilesIndex, StoreIndex, git_hosted_store_index_key,
immutable_sqlite_uri, pick_store_index_key, store_index_key,
CafsFileInfo, GET_MANY_CHUNK, PackageFilesIndex, StoreIndex, StoreIndexError,
git_hosted_store_index_key, immutable_sqlite_uri, pick_store_index_key, store_index_key,
};
use crate::StoreDir;
use pretty_assertions::assert_eq;
@@ -254,6 +254,30 @@ fn get_many_all_hit_returns_every_row() {
}
}
#[test]
fn for_each_raw_visits_every_row() {
let dir = tempdir().unwrap();
let idx = StoreIndex::open(dir.path()).unwrap();
let payload = sample_index();
let mut keys: Vec<String> =
(0..3).map(|index| store_index_key("sha512-x", &format!("pkg{index}@1.0.0"))).collect();
for key in &keys {
idx.set(key, &payload).unwrap();
}
let mut visited = Vec::new();
idx.for_each_raw(|key, data| {
assert!(!data.is_empty());
visited.push(key);
Ok::<(), StoreIndexError>(())
})
.unwrap();
keys.sort();
visited.sort();
assert_eq!(visited, keys);
}
#[test]
fn get_many_mixed_hit_and_miss_returns_only_hits() {
let dir = tempdir().unwrap();