From 35d5440ce1532ccfe27cfd310a1be750fa9b719e Mon Sep 17 00:00:00 2001 From: Zoltan Kochan Date: Wed, 20 May 2026 23:07:16 +0200 Subject: [PATCH] feat(pacquet): port resolving/git-resolver and wire it into the install chain (#11779) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(pacquet): port resolving/git-resolver and wire it into the install chain Adds `pacquet-resolving-git-resolver`, the Rust port of pnpm's `@pnpm/resolving.git-resolver`. Recognises GitHub / GitLab / Bitbucket shortcut forms and full `git+ssh:` / `git+https:` / `ssh:` / plain `https://…/repo.git` URLs, runs `git ls-remote` to pin the commit (partial commit search, annotated-tag dereference, semver-range matching), and emits either a git-hosted tarball resolution or a `Git{repo,commit}` resolution. Production runners shell out to the system `git` binary via `tokio::task::spawn_blocking` and use the install-wide `ThrottledClient` for the HEAD probe. Widens the resolver-base contract so URL-shaped IDs fit: adds a `PkgResolutionId` newtype (rule-3 branded string, infallible `From`/`From<&str>`/`From<&PkgNameVer>`), changes `ResolveResult.id` to that type, and adds `name_ver: Option` so callers that need the structured `name@version` form keep working. npm-resolver fills both fields; git-resolver leaves `name_ver` `None` (the install path that consumes git resolutions hasn't landed yet, so those call sites panic with a TODO message until then). `DefaultResolver` now implements `Resolver` too (returns `Ok(None)` when no resolver in the chain claims), letting `resolve_dependency_tree` accept the chain directly. The install-side wiring in `install_without_lockfile.rs` constructs `DefaultResolver::new(vec![Box::new(npm_resolver), Box::new(git_resolver)])` with `RealGitProbe` / `RealGitRunner`, mirroring upstream's `createResolver` chain order. Test coverage: 51 unit tests in the new crate, including the full SCP-style URL repair matrix ported from `parsePref.test.ts` and the GitLab `/-/archive/` tarball regression for pnpm #11533. Full workspace `cargo nextest run` is green at 1635 tests. --- Written by an agent (Claude Code, claude-opus-4-7). * fix(pacquet): satisfy dylint perfectionist + rustdoc on git-resolver port * Reorder `#[derive(...)]` on `PkgResolutionId` to match the `prefix_then_alphabetical` rule the dylint Perfectionist lint enforces (`From` last after `Serialize`/`Deserialize`). * Add `()` to function intra-doc links that collide with same-named modules (`create_git_hosted_pkg_id`, `parse_bare_specifier`) so rustdoc's `broken-intra-doc-links` lint stops treating them as ambiguous. --- Written by an agent (Claude Code, claude-opus-4-7). * fix(pacquet): satisfy Perfectionist dylint lints on git-resolver port CI's `just ready` doesn't surface Perfectionist (it runs only as a dedicated dylint job on a nightly toolchain). Fixes: * Rename single-letter generics `P`/`R` → `Probe`/`Runner` on `GitResolver`, `PartialSpec::finalize`, `from_hosted_git`, and `resolve_ref`. * Rename single-letter closure / function / let-binding params (`s`/`h`/`c`/`p`/`i`/`g`/...) to descriptive names. * Replace Unicode ellipsis (`…`, U+2026) with ASCII `...` in comments. * Add trailing commas to multi-line `assert_eq!` / `assert!` invocations, and remove the stray trailing comma on a single-line one. Also fix follow-on JSR-resolver test cases that still read `result.id.{name,suffix}`: switch them to `result.name_ver.as_ref()...` to match the post-widening `ResolveResult` shape. --- Written by an agent (Claude Code, claude-opus-4-7). * fix(pacquet): address PR review on git-resolver port * Replace the two `.expect()` calls on `ResolveResult.name_ver` in the install path with `.ok_or_else()` that surfaces a typed error: `InstallPackageFromRegistryError::UnsupportedResolution` and a new `InstallWithoutLockfileError::UnsupportedInstallResolution`. Now that the git resolver is in the chain, a git/tarball/local resolution reaching the without-lockfile install path returns an error end-to-end instead of panicking. Add a regression test pinning the contract. * Make `percent_decode` (in `hosted_git.rs`) and `percent_decode_str` (in `parse_bare_specifier.rs`) UTF-8 aware: collect decoded bytes into a `Vec` and reassemble via `String::from_utf8`, falling back to the original input on malformed UTF-8 (matches Node's `decodeURIComponent` throwing a `URIError` that upstream's `try/catch` swallows). The byte→`char` cast was corrupting any multi-byte sequence (e.g., `%E2%80%A6` → ellipsis); regression test added. --- Written by an agent (Claude Code, claude-opus-4-7). * chore(pacquet): drop unused UnsupportedInstallResolution after rebase Main's `feat(pacquet): peer-dependency resolution stage` reworked `install_without_lockfile.rs` to derive the virtual-store name from the resolved depPath via `pacquet_deps_path::dep_path_to_filename` instead of reading `result.name_ver`. That removed the `.expect()` / `.ok_or_else()` site this error variant was added for; with no remaining callers, drop the dead variant. --- Written by an agent (Claude Code, claude-opus-4-7). --- Cargo.lock | 19 + Cargo.toml | 1 + pacquet/crates/package-manager/Cargo.toml | 54 +- .../src/install_package_from_registry.rs | 13 +- .../install_package_from_registry/tests.rs | 76 +- .../src/install_without_lockfile.rs | 18 +- .../resolving-default-resolver/src/lib.rs | 37 +- .../resolving-default-resolver/src/tests.rs | 6 +- .../src/resolve_dependency_tree.rs | 8 +- .../src/resolve_peers.rs | 37 +- .../resolving-deps-resolver/src/tests.rs | 5 +- .../crates/resolving-git-resolver/Cargo.toml | 30 + .../src/create_git_hosted_pkg_id.rs | 94 ++ .../src/git_resolver.rs | 318 +++++++ .../resolving-git-resolver/src/hosted_git.rs | 873 ++++++++++++++++++ .../crates/resolving-git-resolver/src/lib.rs | 50 + .../src/parse_bare_specifier.rs | 594 ++++++++++++ .../resolving-git-resolver/src/resolve_ref.rs | 419 +++++++++ .../resolving-git-resolver/src/runners.rs | 148 +++ .../src/npm_resolver.rs | 4 +- .../src/npm_resolver/tests.rs | 18 +- .../crates/resolving-resolver-base/Cargo.toml | 7 +- .../crates/resolving-resolver-base/src/lib.rs | 8 +- .../resolving-resolver-base/src/resolve.rs | 61 +- .../resolving-resolver-base/src/tests.rs | 5 +- 25 files changed, 2830 insertions(+), 73 deletions(-) create mode 100644 pacquet/crates/resolving-git-resolver/Cargo.toml create mode 100644 pacquet/crates/resolving-git-resolver/src/create_git_hosted_pkg_id.rs create mode 100644 pacquet/crates/resolving-git-resolver/src/git_resolver.rs create mode 100644 pacquet/crates/resolving-git-resolver/src/hosted_git.rs create mode 100644 pacquet/crates/resolving-git-resolver/src/lib.rs create mode 100644 pacquet/crates/resolving-git-resolver/src/parse_bare_specifier.rs create mode 100644 pacquet/crates/resolving-git-resolver/src/resolve_ref.rs create mode 100644 pacquet/crates/resolving-git-resolver/src/runners.rs diff --git a/Cargo.lock b/Cargo.lock index 2e1e0cfea2..0ee6bc7b22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2303,7 +2303,9 @@ dependencies = [ "pacquet-registry", "pacquet-registry-mock", "pacquet-reporter", + "pacquet-resolving-default-resolver", "pacquet-resolving-deps-resolver", + "pacquet-resolving-git-resolver", "pacquet-resolving-npm-resolver", "pacquet-resolving-resolver-base", "pacquet-store-dir", @@ -2447,6 +2449,22 @@ dependencies = [ "tokio", ] +[[package]] +name = "pacquet-resolving-git-resolver" +version = "0.0.1" +dependencies = [ + "derive_more", + "miette 7.6.0", + "node-semver", + "pacquet-lockfile", + "pacquet-network", + "pacquet-resolving-resolver-base", + "pretty_assertions", + "reqwest", + "tokio", + "tracing", +] + [[package]] name = "pacquet-resolving-jsr-specifier-parser" version = "0.0.1" @@ -2493,6 +2511,7 @@ name = "pacquet-resolving-resolver-base" version = "0.0.1" dependencies = [ "chrono", + "derive_more", "pacquet-config", "pacquet-lockfile", "serde", diff --git a/Cargo.toml b/Cargo.toml index daa93c2aa7..316063983e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,7 @@ pacquet-patching = { path = "pacquet/crates/patching" } pacquet-real-hoist = { path = "pacquet/crates/real-hoist" } pacquet-resolving-default-resolver = { path = "pacquet/crates/resolving-default-resolver" } pacquet-resolving-deps-resolver = { path = "pacquet/crates/resolving-deps-resolver" } +pacquet-resolving-git-resolver = { path = "pacquet/crates/resolving-git-resolver" } pacquet-resolving-jsr-specifier-parser = { path = "pacquet/crates/resolving-jsr-specifier-parser" } pacquet-resolving-npm-resolver = { path = "pacquet/crates/resolving-npm-resolver" } pacquet-resolving-parse-wanted-dependency = { path = "pacquet/crates/resolving-parse-wanted-dependency" } diff --git a/pacquet/crates/package-manager/Cargo.toml b/pacquet/crates/package-manager/Cargo.toml index 9933a5f2db..8d79fe21d7 100644 --- a/pacquet/crates/package-manager/Cargo.toml +++ b/pacquet/crates/package-manager/Cargo.toml @@ -11,32 +11,34 @@ license.workspace = true repository.workspace = true [dependencies] -pacquet-cmd-shim = { workspace = true } -pacquet-crypto-hash = { workspace = true } -pacquet-directory-fetcher = { workspace = true } -pacquet-executor = { workspace = true } -pacquet-fs = { workspace = true } -pacquet-git-fetcher = { workspace = true } -pacquet-lockfile = { workspace = true } -pacquet-lockfile-verification = { workspace = true } -pacquet-modules-yaml = { workspace = true } -pacquet-network = { workspace = true } -pacquet-config = { workspace = true } -pacquet-graph-hasher = { workspace = true } -pacquet-package-manifest = { workspace = true } -pacquet-package-is-installable = { workspace = true } -pacquet-patching = { workspace = true } -pacquet-real-hoist = { workspace = true } -pacquet-registry = { workspace = true } -pacquet-reporter = { workspace = true } -pacquet-deps-path = { workspace = true } -pacquet-resolving-deps-resolver = { workspace = true } -pacquet-resolving-npm-resolver = { workspace = true } -pacquet-resolving-resolver-base = { workspace = true } -pacquet-store-dir = { workspace = true } -pacquet-tarball = { workspace = true } -pacquet-workspace = { workspace = true } -pacquet-workspace-state = { workspace = true } +pacquet-cmd-shim = { workspace = true } +pacquet-crypto-hash = { workspace = true } +pacquet-deps-path = { workspace = true } +pacquet-directory-fetcher = { workspace = true } +pacquet-executor = { workspace = true } +pacquet-fs = { workspace = true } +pacquet-git-fetcher = { workspace = true } +pacquet-lockfile = { workspace = true } +pacquet-lockfile-verification = { workspace = true } +pacquet-modules-yaml = { workspace = true } +pacquet-network = { workspace = true } +pacquet-config = { workspace = true } +pacquet-graph-hasher = { workspace = true } +pacquet-package-manifest = { workspace = true } +pacquet-package-is-installable = { workspace = true } +pacquet-patching = { workspace = true } +pacquet-real-hoist = { workspace = true } +pacquet-registry = { workspace = true } +pacquet-reporter = { workspace = true } +pacquet-resolving-default-resolver = { workspace = true } +pacquet-resolving-deps-resolver = { workspace = true } +pacquet-resolving-git-resolver = { workspace = true } +pacquet-resolving-npm-resolver = { workspace = true } +pacquet-resolving-resolver-base = { workspace = true } +pacquet-store-dir = { workspace = true } +pacquet-tarball = { workspace = true } +pacquet-workspace = { workspace = true } +pacquet-workspace-state = { workspace = true } async-recursion = { workspace = true } chrono = { workspace = true } diff --git a/pacquet/crates/package-manager/src/install_package_from_registry.rs b/pacquet/crates/package-manager/src/install_package_from_registry.rs index 40348f2ef2..5ddfe62244 100644 --- a/pacquet/crates/package-manager/src/install_package_from_registry.rs +++ b/pacquet/crates/package-manager/src/install_package_from_registry.rs @@ -104,8 +104,17 @@ impl<'a> InstallPackageFromRegistry<'a> { first_visit, } = self; - let real_name = resolution.id.name.to_string(); - let version = resolution.id.suffix.to_string(); + let name_ver = resolution.name_ver.as_ref().ok_or_else(|| { + InstallPackageFromRegistryError::UnsupportedResolution { + detail: format!( + "resolver {resolved_via} produced a resolution without a structured \ + name@version; the npm install path needs both (alias={alias})", + resolved_via = resolution.resolved_via, + ), + } + })?; + let real_name = name_ver.name.to_string(); + let version = name_ver.suffix.to_string(); let virtual_store_name = shorten_virtual_store_name( format!("{}@{}", real_name.replace('/', "+"), version), config.virtual_store_dir_max_length as usize, diff --git a/pacquet/crates/package-manager/src/install_package_from_registry/tests.rs b/pacquet/crates/package-manager/src/install_package_from_registry/tests.rs index 06f03f2bb2..f3701d2add 100644 --- a/pacquet/crates/package-manager/src/install_package_from_registry/tests.rs +++ b/pacquet/crates/package-manager/src/install_package_from_registry/tests.rs @@ -1,10 +1,11 @@ -use super::InstallPackageFromRegistry; +use super::{InstallPackageFromRegistry, InstallPackageFromRegistryError}; use pacquet_config::Config; +use pacquet_lockfile::{LockfileResolution, TarballResolution}; use pacquet_network::ThrottledClient; use pacquet_registry_mock::AutoMockInstance; use pacquet_reporter::{LogEvent, ProgressMessage, Reporter, SilentReporter}; use pacquet_resolving_npm_resolver::{InMemoryPackageMetaCache, NpmResolver}; -use pacquet_resolving_resolver_base::{ResolveOptions, Resolver, WantedDependency}; +use pacquet_resolving_resolver_base::{ResolveOptions, ResolveResult, Resolver, WantedDependency}; use pacquet_store_dir::{SharedVerifiedFilesCache, StoreDir}; use pipe_trait::Pipe; use pretty_assertions::assert_eq; @@ -152,8 +153,9 @@ pub async fn should_install_package_from_pre_resolved_result() { .await .unwrap(); - let real_name = resolution.id.name.to_string(); - let virtual_store_name = format!("{}@{}", real_name.replace('/', "+"), resolution.id.suffix); + let name_ver = resolution.name_ver.as_ref().expect("npm resolver fills name_ver"); + let real_name = name_ver.name.to_string(); + let virtual_store_name = format!("{}@{}", real_name.replace('/', "+"), name_ver.suffix); let virtual_store_path = virtual_store_dir.path().join(virtual_store_name).join("node_modules").join(&real_name); assert!(virtual_store_path.is_dir()); @@ -382,3 +384,69 @@ async fn install_emits_progress_sequence() { drop((store_dir, modules_dir, virtual_store_dir, cache_dir, mock_instance)); } + +/// Regression test: a `ResolveResult` whose `name_ver` is `None` +/// (every non-npm resolver — git / tarball / local) must surface as +/// [`InstallPackageFromRegistryError::UnsupportedResolution`] rather +/// than panicking. Pins the install path's contract once the git +/// resolver is wired into the chain. +#[tokio::test] +async fn install_returns_unsupported_resolution_when_name_ver_missing() { + let store_dir = tempdir().unwrap(); + let modules_dir = tempdir().unwrap(); + let virtual_store_dir = tempdir().unwrap(); + + let config = create_config(store_dir.path(), modules_dir.path(), virtual_store_dir.path()); + let config: &'static Config = config.pipe(Box::new).pipe(Box::leak); + + let http_client = Arc::new(ThrottledClient::new_for_installs()); + let verified_files_cache = SharedVerifiedFilesCache::default(); + let logged_methods = AtomicU8::new(0); + + let resolution = ResolveResult { + id: "git+ssh://git@example.com/foo/bar.git#deadbeef".into(), + name_ver: None, + latest: None, + published_at: None, + manifest: None, + resolution: LockfileResolution::Tarball(TarballResolution { + tarball: "https://example.com/foo.tar.gz".to_string(), + integrity: None, + git_hosted: Some(true), + path: None, + }), + resolved_via: "git-repository".to_string(), + normalized_bare_specifier: Some("github:foo/bar#deadbeef".to_string()), + alias: Some("bar".to_string()), + policy_violation: None, + }; + + let result = InstallPackageFromRegistry { + tarball_mem_cache: &Default::default(), + config, + http_client: &http_client, + store_index: None, + store_index_writer: None, + verified_files_cache: &verified_files_cache, + logged_methods: &logged_methods, + requester: "", + alias: "bar", + resolution: &resolution, + node_modules_dir: modules_dir.path(), + first_visit: true, + } + .run::() + .await; + + match result { + Err(InstallPackageFromRegistryError::UnsupportedResolution { detail }) => { + assert!( + detail.contains("git-repository"), + "error should name the resolver tag: {detail}", + ); + } + other => panic!("expected UnsupportedResolution, got {other:?}"), + } + + drop((store_dir, modules_dir, virtual_store_dir)); +} diff --git a/pacquet/crates/package-manager/src/install_without_lockfile.rs b/pacquet/crates/package-manager/src/install_without_lockfile.rs index 4d576fd989..b43e28b116 100644 --- a/pacquet/crates/package-manager/src/install_without_lockfile.rs +++ b/pacquet/crates/package-manager/src/install_without_lockfile.rs @@ -12,12 +12,14 @@ use pacquet_config::Config; use pacquet_network::ThrottledClient; use pacquet_package_manifest::{DependencyGroup, PackageManifest}; use pacquet_reporter::{LogEvent, LogLevel, Reporter, Stage, StageLog}; +use pacquet_resolving_default_resolver::DefaultResolver; use pacquet_resolving_deps_resolver::{ DepPath, DependenciesGraph, ResolveDependencyTreeError, ResolveDependencyTreeOptions, ResolvePeersOptions, resolve_dependency_tree, resolve_peers, }; +use pacquet_resolving_git_resolver::{GitResolver, RealGitProbe, RealGitRunner}; use pacquet_resolving_npm_resolver::{InMemoryPackageMetaCache, NpmResolver}; -use pacquet_resolving_resolver_base::ResolveOptions; +use pacquet_resolving_resolver_base::{ResolveOptions, Resolver}; use pacquet_store_dir::{SharedVerifiedFilesCache, StoreIndex, StoreIndexWriter}; use pacquet_tarball::MemCache; use pipe_trait::Pipe; @@ -171,6 +173,16 @@ impl<'a, DependencyGroupList> InstallWithoutLockfile<'a, DependencyGroupList> { prefer_offline: config.prefer_offline, ignore_missing_time_field: config.minimum_release_age_ignore_missing_time, }; + let git_resolver = GitResolver::new( + Arc::new(RealGitProbe::new(Arc::clone(&http_client_arc))), + Arc::new(RealGitRunner::new()), + ); + // Order mirrors upstream's chain at + // : + // npm before git. Local/tarball/workspace/runtimes will slot + // in as those crates land. + let resolver: Box = + Box::new(DefaultResolver::new(vec![Box::new(npm_resolver), Box::new(git_resolver)])); // Compile `minimumReleaseAge` (and its exclude pattern set) // for the resolve pass. Mirrors the verifier wiring in @@ -205,13 +217,13 @@ impl<'a, DependencyGroupList> InstallWithoutLockfile<'a, DependencyGroupList> { }, }; - let tree = resolve_dependency_tree(&npm_resolver, manifest, dependency_groups, tree_opts) + let tree = resolve_dependency_tree(&*resolver, manifest, dependency_groups, tree_opts) .await .map_err(InstallWithoutLockfileError::ResolveDependencyTree)?; // Drop the resolver (and its meta cache) before the install // pass: the tree captures every `ResolveResult` we need. - drop(npm_resolver); + drop(resolver); // Open the read-only SQLite index once per install, shared across // every `DownloadTarballToStore`. See the matching comment in diff --git a/pacquet/crates/resolving-default-resolver/src/lib.rs b/pacquet/crates/resolving-default-resolver/src/lib.rs index 08462894ad..f52ff6e530 100644 --- a/pacquet/crates/resolving-default-resolver/src/lib.rs +++ b/pacquet/crates/resolving-default-resolver/src/lib.rs @@ -17,8 +17,8 @@ use derive_more::{Display, Error}; use miette::Diagnostic; use pacquet_resolving_resolver_base::{ - LatestInfo, LatestQuery, ResolveError, ResolveOptions, ResolveResult, Resolver, - WantedDependency, + LatestInfo, LatestQuery, ResolveError, ResolveFuture, ResolveLatestFuture, ResolveOptions, + ResolveResult, Resolver, WantedDependency, }; /// Composed-chain analog of pnpm's @@ -78,6 +78,39 @@ impl DefaultResolver { } } +/// `DefaultResolver` doubles as a [`Resolver`] so callers can compose +/// it into another dispatcher (or hand it to a consumer that already +/// accepts the trait, like `resolve_dependency_tree`). Through the +/// trait, the "no resolver claimed" branch surfaces as `Ok(None)` so +/// the caller chooses how to react — the inherent +/// [`Self::resolve`](DefaultResolver::resolve) method keeps raising +/// [`SpecNotSupportedByAnyResolverError`] for callers that prefer the +/// error form. +impl Resolver for DefaultResolver { + fn resolve<'a>( + &'a self, + wanted_dependency: &'a WantedDependency, + opts: &'a ResolveOptions, + ) -> ResolveFuture<'a> { + Box::pin(async move { + for resolver in &self.chain { + if let Some(result) = resolver.resolve(wanted_dependency, opts).await? { + return Ok(Some(result)); + } + } + Ok(None) + }) + } + + fn resolve_latest<'a>( + &'a self, + query: &'a LatestQuery, + opts: &'a ResolveOptions, + ) -> ResolveLatestFuture<'a> { + Box::pin(self.resolve_latest(query, opts)) + } +} + /// The `SPEC_NOT_SUPPORTED_BY_ANY_RESOLVER` error code raised when /// every resolver in the chain returned `Ok(None)` for a wanted /// dependency. diff --git a/pacquet/crates/resolving-default-resolver/src/tests.rs b/pacquet/crates/resolving-default-resolver/src/tests.rs index 82790b3a3a..6b128c1ff4 100644 --- a/pacquet/crates/resolving-default-resolver/src/tests.rs +++ b/pacquet/crates/resolving-default-resolver/src/tests.rs @@ -15,7 +15,7 @@ fn fake_resolution() -> LockfileResolution { }) } -fn fake_id() -> PkgNameVer { +fn fake_name_ver() -> PkgNameVer { "lodash@4.17.21".parse().expect("parse fake PkgNameVer") } @@ -38,8 +38,10 @@ impl Resolver for PrefixResolver { if !bare.starts_with(self.prefix) { return Ok(None); } + let name_ver = fake_name_ver(); Ok(Some(ResolveResult { - id: fake_id(), + id: (&name_ver).into(), + name_ver: Some(name_ver), latest: None, published_at: None, manifest: None, diff --git a/pacquet/crates/resolving-deps-resolver/src/resolve_dependency_tree.rs b/pacquet/crates/resolving-deps-resolver/src/resolve_dependency_tree.rs index 4283d776f4..e19989570d 100644 --- a/pacquet/crates/resolving-deps-resolver/src/resolve_dependency_tree.rs +++ b/pacquet/crates/resolving-deps-resolver/src/resolve_dependency_tree.rs @@ -177,8 +177,12 @@ where return Ok(None); } - let alias = - result.alias.clone().or(wanted.alias.clone()).unwrap_or_else(|| result.id.name.to_string()); + let alias = result + .alias + .clone() + .or(wanted.alias.clone()) + .or_else(|| result.name_ver.as_ref().map(|nv| nv.name.to_string())) + .unwrap_or_else(|| id.clone()); // Build (or look up) the ResolvedPackage envelope. The first // visitor populates it; later visitors collapse onto it. diff --git a/pacquet/crates/resolving-deps-resolver/src/resolve_peers.rs b/pacquet/crates/resolving-deps-resolver/src/resolve_peers.rs index 69ab2c8518..19ab4bc4c8 100644 --- a/pacquet/crates/resolving-deps-resolver/src/resolve_peers.rs +++ b/pacquet/crates/resolving-deps-resolver/src/resolve_peers.rs @@ -43,6 +43,29 @@ use crate::{ node_id::NodeId, resolved_tree::{PeerDep, ResolvedPackage, ResolvedTree}, }; +use pacquet_resolving_resolver_base::ResolveResult; + +/// Pull `(name, version)` out of a `ResolveResult` the peer-resolution +/// stage can hash and compare on. +/// +/// The npm-registry resolver always fills [`ResolveResult::name_ver`], +/// so the fast path lifts it straight out. The git / tarball / local +/// resolvers leave it `None` (their canonical name lives in the +/// fetched manifest, which the resolver doesn't read at resolve +/// time); for those, fall back to `(alias, id-as-string)`. The peer +/// graph machinery only ever looks the name up in +/// [`ResolvedTree::all_peer_dep_names`] — a set that comes from +/// upstream's `parsePeerDependencies` over npm-shaped packages — so +/// the fallback's "name" will simply miss every lookup, naturally +/// short-circuiting peer propagation for non-npm packages without +/// panicking on `name_ver = None`. +fn pkg_name_version(result: &ResolveResult) -> (String, String) { + if let Some(name_ver) = result.name_ver.as_ref() { + return (name_ver.name.to_string(), name_ver.suffix.to_string()); + } + let fallback_name = result.alias.clone().unwrap_or_else(|| result.id.as_str().to_string()); + (fallback_name, result.id.as_str().to_string()) +} /// Options threaded into [`fn@resolve_peers`]. #[derive(Debug, Clone, Copy)] @@ -284,7 +307,7 @@ impl<'tree> Walker<'tree> { let tree_node = self.tree.dependencies_tree[&node_id].clone(); let pkg = self.tree.packages[&tree_node.resolved_package_id].clone(); - let pkg_name = pkg.result.id.name.to_string(); + let (pkg_name, _pkg_version) = pkg_name_version(&pkg.result); // Build the ParentRefs map that descendants of this node see: // parent's view + this node's own children, restricted to @@ -295,7 +318,7 @@ impl<'tree> Walker<'tree> { let Some(child_pkg) = self.tree.packages.get(&child_tree.resolved_package_id) else { continue; }; - let child_real_name = child_pkg.result.id.name.to_string(); + let (child_real_name, child_version) = pkg_name_version(&child_pkg.result); // Only peer-relevant aliases need to land in `parentRefs`. // Pnpm filters with `allPeerDepNames` to keep the propagated // map small. @@ -304,7 +327,6 @@ impl<'tree> Walker<'tree> { if !alias_relevant && !real_relevant { continue; } - let child_version = child_pkg.result.id.suffix.to_string(); let parent_ref = ParentRef { version: child_version, node_id: Some(*child_node_id), @@ -558,10 +580,8 @@ impl<'tree> Walker<'tree> { } let tree_node = &self.tree.dependencies_tree[&peer_node_id]; let pkg = &self.tree.packages[&tree_node.resolved_package_id]; - PeerId::Pair { - name: pkg.result.id.name.to_string(), - version: pkg.result.id.suffix.to_string(), - } + let (name, version) = pkg_name_version(&pkg.result); + PeerId::Pair { name, version } } } @@ -575,8 +595,7 @@ fn insert_parent_ref( pkg: &ResolvedPackage, tree: &ResolvedTree, ) { - let real_name = pkg.result.id.name.to_string(); - let version = pkg.result.id.suffix.to_string(); + let (real_name, version) = pkg_name_version(&pkg.result); let alias_relevant = tree.all_peer_dep_names.contains(&direct.alias); let real_relevant = tree.all_peer_dep_names.contains(&real_name); if !alias_relevant && !real_relevant { diff --git a/pacquet/crates/resolving-deps-resolver/src/tests.rs b/pacquet/crates/resolving-deps-resolver/src/tests.rs index e3e5d3b1a8..89bf776b2e 100644 --- a/pacquet/crates/resolving-deps-resolver/src/tests.rs +++ b/pacquet/crates/resolving-deps-resolver/src/tests.rs @@ -44,12 +44,13 @@ impl Resolver for StubResolver { fn fake_result(name: &str, version: &str, manifest: serde_json::Value) -> ResolveResult { use pacquet_lockfile::{LockfileResolution, PkgName, PkgNameVer, TarballResolution}; - let id = PkgNameVer::new( + let name_ver = PkgNameVer::new( PkgName::parse(name).unwrap(), node_semver::Version::from_str(version).unwrap(), ); ResolveResult { - id, + id: (&name_ver).into(), + name_ver: Some(name_ver), latest: Some(version.to_string()), published_at: None, manifest: Some(manifest), diff --git a/pacquet/crates/resolving-git-resolver/Cargo.toml b/pacquet/crates/resolving-git-resolver/Cargo.toml new file mode 100644 index 0000000000..b227450da1 --- /dev/null +++ b/pacquet/crates/resolving-git-resolver/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "pacquet-resolving-git-resolver" +version = "0.0.1" +publish = false +authors.workspace = true +description.workspace = true +edition.workspace = true +homepage.workspace = true +keywords.workspace = true +license.workspace = true +repository.workspace = true + +[dependencies] +pacquet-lockfile = { workspace = true } +pacquet-network = { workspace = true } +pacquet-resolving-resolver-base = { workspace = true } + +derive_more = { workspace = true } +miette = { workspace = true } +node-semver = { workspace = true } +reqwest = { workspace = true } +tokio = { workspace = true } +tracing = { workspace = true } + +[dev-dependencies] +pretty_assertions = { workspace = true } +tokio = { workspace = true, features = ["macros", "rt"] } + +[lints] +workspace = true diff --git a/pacquet/crates/resolving-git-resolver/src/create_git_hosted_pkg_id.rs b/pacquet/crates/resolving-git-resolver/src/create_git_hosted_pkg_id.rs new file mode 100644 index 0000000000..7f9a3513eb --- /dev/null +++ b/pacquet/crates/resolving-git-resolver/src/create_git_hosted_pkg_id.rs @@ -0,0 +1,94 @@ +//! Ports pnpm's +//! [`createGitHostedPkgId.ts`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/createGitHostedPkgId.ts). + +/// Build the URL-shaped ID for a `Git` lockfile resolution. +/// +/// Mirrors upstream's [`createGitHostedPkgId`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/createGitHostedPkgId.ts#L3-L10): +/// +/// * Prefix `https://` when `repo` has no scheme. +/// * Prefix `git+` when the resulting string doesn't start with it. +/// * Append `#`. +/// * Append `&path:` when `path` is `Some`. +/// +/// The output is the `PkgResolutionId` upstream stamps as `id` on a git +/// `ResolveResult`. +pub fn create_git_hosted_pkg_id(repo: &str, commit: &str, path: Option<&str>) -> String { + let mut id = if repo.contains("://") { + format!("{repo}#{commit}") + } else { + format!("https://{repo}#{commit}") + }; + if !id.starts_with("git+") { + id.insert_str(0, "git+"); + } + if let Some(path) = path { + id.push_str("&path:"); + id.push_str(path); + } + id +} + +#[cfg(test)] +mod tests { + use super::create_git_hosted_pkg_id; + + #[test] + fn ssh_url() { + assert_eq!( + create_git_hosted_pkg_id( + "ssh://git@example.com/org/repo.git", + "cba04669e621b85fbdb33371604de1a2898e68e9", + None, + ), + "git+ssh://git@example.com/org/repo.git#cba04669e621b85fbdb33371604de1a2898e68e9", + ); + } + + #[test] + fn https_url_with_auth() { + assert_eq!( + create_git_hosted_pkg_id( + "https://0000000000000000000000000000000000000000:x-oauth-basic@github.com/foo/bar.git", + "0000000000000000000000000000000000000000", + None, + ), + "git+https://0000000000000000000000000000000000000000:x-oauth-basic@github.com/foo/bar.git#0000000000000000000000000000000000000000", + ); + } + + #[test] + fn file_url() { + assert_eq!( + create_git_hosted_pkg_id( + "file:///Users/zoltan/src/pnpm/pnpm/resolving/git-resolver", + "988c61e11dc8d9ca0b5580cb15291951812549dc", + None, + ), + "git+file:///Users/zoltan/src/pnpm/pnpm/resolving/git-resolver#988c61e11dc8d9ca0b5580cb15291951812549dc", + ); + } + + #[test] + fn bare_host_path_gains_https() { + assert_eq!( + create_git_hosted_pkg_id( + "github.com/foo/bar.git", + "0000000000000000000000000000000000000000", + None, + ), + "git+https://github.com/foo/bar.git#0000000000000000000000000000000000000000", + ); + } + + #[test] + fn appends_path() { + assert_eq!( + create_git_hosted_pkg_id( + "https://github.com/foo/bar.git", + "0000000000000000000000000000000000000000", + Some("/packages/sub"), + ), + "git+https://github.com/foo/bar.git#0000000000000000000000000000000000000000&path:/packages/sub", + ); + } +} diff --git a/pacquet/crates/resolving-git-resolver/src/git_resolver.rs b/pacquet/crates/resolving-git-resolver/src/git_resolver.rs new file mode 100644 index 0000000000..1f46a55975 --- /dev/null +++ b/pacquet/crates/resolving-git-resolver/src/git_resolver.rs @@ -0,0 +1,318 @@ +//! Pacquet port of pnpm's +//! [`createGitResolver`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/index.ts#L25-L102). +//! +//! [`GitResolver`] wires the parser, the host probe, and the +//! ls-remote runner into a single [`Resolver`] the dispatcher can +//! compose into the default-resolver chain. + +use std::sync::Arc; + +use pacquet_lockfile::{GitResolution, LockfileResolution, TarballResolution}; +use pacquet_resolving_resolver_base::{ + LatestInfo, LatestQuery, ResolveError, ResolveFuture, ResolveLatestFuture, ResolveOptions, + ResolveResult, Resolver, WantedDependency, +}; + +use crate::{ + create_git_hosted_pkg_id::create_git_hosted_pkg_id, + hosted_git::HostedOpts, + parse_bare_specifier::{GitProbe, HostedPackageSpec, parse_bare_specifier}, + resolve_ref::{GitCommandRunner, resolve_ref}, +}; + +/// Git resolver entry point. Holds the production network / git +/// runners shared across every per-dep `resolve()` call; tests +/// construct one with fake runners. +/// +/// `Arc` so the resolver can be cloned into the default-resolver +/// chain without forcing the runners (whose ownership lives on the +/// install dispatcher) into a single owner. +pub struct GitResolver { + probe: Arc, + runner: Arc, +} + +impl GitResolver { + pub fn new(probe: Arc, runner: Arc) -> Self { + Self { probe, runner } + } +} + +impl Resolver + for GitResolver +{ + fn resolve<'a>( + &'a self, + wanted_dependency: &'a WantedDependency, + opts: &'a ResolveOptions, + ) -> ResolveFuture<'a> { + Box::pin(self.resolve_impl(wanted_dependency, opts)) + } + + fn resolve_latest<'a>( + &'a self, + query: &'a LatestQuery, + opts: &'a ResolveOptions, + ) -> ResolveLatestFuture<'a> { + Box::pin(self.resolve_latest_impl(query, opts)) + } +} + +impl GitResolver { + async fn resolve_impl( + &self, + wanted_dependency: &WantedDependency, + _opts: &ResolveOptions, + ) -> Result, ResolveError> { + let Some(bare) = wanted_dependency.bare_specifier.as_deref() else { return Ok(None) }; + let Some(partial) = parse_bare_specifier(bare) else { return Ok(None) }; + let spec = partial.finalize(self.probe.as_ref()).await; + let result = + build_resolve_result(spec, self.runner.as_ref(), wanted_dependency.alias.as_deref()) + .await?; + Ok(Some(result)) + } + + /// Companion to [`Self::resolve_impl`]. + /// + /// Mirrors pnpm's + /// [`resolveLatestFromGit`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/index.ts#L108-L114): + /// claim every dep the parser recognises, but return an empty + /// [`LatestInfo`] (git has no uniform "latest" notion — a host's + /// tag list would be the closest proxy and the protocols disagree). + async fn resolve_latest_impl( + &self, + query: &LatestQuery, + _opts: &ResolveOptions, + ) -> Result, ResolveError> { + let Some(bare) = query.wanted_dependency.bare_specifier.as_deref() else { + return Ok(None); + }; + if parse_bare_specifier(bare).is_none() { + return Ok(None); + } + Ok(Some(LatestInfo::default())) + } +} + +async fn build_resolve_result( + spec: HostedPackageSpec, + runner: &Runner, + alias: Option<&str>, +) -> Result { + let ref_for_ls_remote = match spec.git_committish.as_deref() { + Some(committish) if !committish.is_empty() => committish, + _ => "HEAD", + }; + let commit = + resolve_ref(runner, &spec.fetch_spec, ref_for_ls_remote, spec.git_range.as_deref()) + .await + .map_err(|err| Box::new(err) as ResolveError)?; + + let resolution = pick_resolution(&spec, &commit); + + let id_string = match &resolution { + LockfileResolution::Tarball(t) => { + let mut id = t.tarball.clone(); + if let Some(path) = &t.path { + id.push_str("#path:"); + id.push_str(path); + } + id + } + LockfileResolution::Git(g) => { + create_git_hosted_pkg_id(&g.repo, &g.commit, g.path.as_deref()) + } + _ => unreachable!("pick_resolution returns Tarball or Git only"), + }; + + Ok(ResolveResult { + id: id_string.into(), + name_ver: None, + latest: None, + published_at: None, + manifest: None, + resolution, + resolved_via: "git-repository".to_string(), + normalized_bare_specifier: Some(spec.normalized_bare_specifier), + alias: alias.map(str::to_string), + policy_violation: None, + }) +} + +/// Pick between a tarball and a git resolution. Mirrors the +/// `resolution = …` branch in upstream's +/// [`resolveFromGit`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/index.ts#L60-L83). +fn pick_resolution(spec: &HostedPackageSpec, commit: &str) -> LockfileResolution { + if let Some(hosted) = spec.hosted.as_ref() + && !is_ssh(&spec.fetch_spec) + { + // For hosted, non-ssh repos: produce a tarball URL the + // git-hosted tarball fetcher can pick up. Build it from a + // clone of the hosted struct with the resolved committish + // pinned in. + let mut hosted = hosted.clone(); + hosted.committish = Some(commit.to_string()); + if let Some(tarball) = hosted.tarball(HostedOpts::default()) { + return LockfileResolution::Tarball(TarballResolution { + tarball, + integrity: None, + git_hosted: Some(true), + path: spec.path.clone(), + }); + } + } + LockfileResolution::Git(GitResolution { + repo: spec.fetch_spec.clone(), + commit: commit.to_string(), + path: spec.path.clone(), + }) +} + +fn is_ssh(spec: &str) -> bool { + spec.starts_with("git+ssh://") || spec.starts_with("git@") +} + +#[cfg(test)] +mod tests { + use std::sync::{Arc, Mutex}; + use std::{future::Future, pin::Pin}; + + use pacquet_lockfile::LockfileResolution; + use pacquet_resolving_resolver_base::{ResolveOptions, Resolver, WantedDependency}; + + use super::{GitProbe, GitResolver}; + use crate::parse_bare_specifier::ProbeFuture; + use crate::resolve_ref::{GitCommandRunner, GitRunError}; + + struct FakeProbe { + head_ok: bool, + ls_ok: bool, + } + impl GitProbe for FakeProbe { + fn https_head_ok<'a>(&'a self, _url: &'a str) -> ProbeFuture<'a> { + let v = self.head_ok; + Box::pin(async move { v }) + } + fn ls_remote_exit_code<'a>(&'a self, _repo: &'a str) -> ProbeFuture<'a> { + let v = self.ls_ok; + Box::pin(async move { v }) + } + } + + struct FakeRunner { + stdout: String, + calls: Mutex)>>, + } + impl GitCommandRunner for FakeRunner { + fn ls_remote<'a>( + &'a self, + repo: &'a str, + ref_: Option<&'a str>, + ) -> Pin> + Send + 'a>> { + self.calls.lock().unwrap().push((repo.to_string(), ref_.map(str::to_string))); + let stdout = self.stdout.clone(); + Box::pin(async move { Ok(stdout) }) + } + } + + fn resolver(head_ok: bool, ls_ok: bool, stdout: &str) -> GitResolver { + GitResolver::new( + Arc::new(FakeProbe { head_ok, ls_ok }), + Arc::new(FakeRunner { stdout: stdout.to_string(), calls: Mutex::new(Vec::new()) }), + ) + } + + #[tokio::test] + async fn declines_non_git_specifier() { + let resolver = resolver(true, true, ""); + let wanted = WantedDependency { + alias: Some("foo".to_string()), + bare_specifier: Some("1.2.3".to_string()), + ..WantedDependency::default() + }; + assert!(resolver.resolve(&wanted, &ResolveOptions::default()).await.unwrap().is_none()); + } + + #[tokio::test] + async fn github_shortcut_full_commit_returns_tarball() { + let resolver = resolver(true, true, ""); + let wanted = WantedDependency { + alias: None, + bare_specifier: Some( + "zkochan/is-negative#163360a8d3ae6bee9524541043197ff356f8ed99".to_string(), + ), + ..WantedDependency::default() + }; + let result = + resolver.resolve(&wanted, &ResolveOptions::default()).await.unwrap().expect("claimed"); + assert_eq!(result.resolved_via, "git-repository"); + match result.resolution { + LockfileResolution::Tarball(t) => { + assert_eq!( + t.tarball, + "https://codeload.github.com/zkochan/is-negative/tar.gz/163360a8d3ae6bee9524541043197ff356f8ed99", + ); + assert_eq!(t.git_hosted, Some(true)); + assert!(t.path.is_none()); + } + other => panic!("expected Tarball, got {other:?}"), + } + assert_eq!( + result.id.as_str(), + "https://codeload.github.com/zkochan/is-negative/tar.gz/163360a8d3ae6bee9524541043197ff356f8ed99", + ); + assert_eq!( + result.normalized_bare_specifier.as_deref(), + Some("github:zkochan/is-negative#163360a8d3ae6bee9524541043197ff356f8ed99"), + ); + } + + #[tokio::test] + async fn ssh_url_falls_back_to_git_resolution() { + let stdout = "abcdef1234567890123456789012345678901234\tHEAD\n"; + // head_ok=false → first https branch fails; ls_ok=true → ssh branch wins. + let resolver = resolver(false, true, stdout); + let wanted = WantedDependency { + alias: None, + bare_specifier: Some("git+ssh://git@example.com/org/repo.git#abcdef12".to_string()), + ..WantedDependency::default() + }; + let result = + resolver.resolve(&wanted, &ResolveOptions::default()).await.unwrap().expect("claimed"); + match result.resolution { + LockfileResolution::Git(g) => { + assert_eq!(g.repo, "ssh://git@example.com/org/repo.git"); + assert_eq!(g.commit, "abcdef1234567890123456789012345678901234"); + assert!(g.path.is_none()); + } + other => panic!("expected Git, got {other:?}"), + } + // id is git+ssh:// shaped via create_git_hosted_pkg_id. + assert!(result.id.as_str().starts_with("git+ssh://git@example.com/org/repo.git#")); + } + + #[tokio::test] + async fn path_suffix_appended_to_id_and_resolution() { + let stdout = "1111111111111111111111111111111111111111\tHEAD\n"; + let resolver = resolver(true, true, stdout); + let wanted = WantedDependency { + alias: None, + bare_specifier: Some( + "github:RexSkz/test-git-subfolder-fetch#path:/packages/simple-react-app" + .to_string(), + ), + ..WantedDependency::default() + }; + let result = + resolver.resolve(&wanted, &ResolveOptions::default()).await.unwrap().expect("claimed"); + match result.resolution { + LockfileResolution::Tarball(t) => { + assert_eq!(t.path.as_deref(), Some("/packages/simple-react-app")); + assert!(t.tarball.ends_with("/tar.gz/1111111111111111111111111111111111111111")); + } + other => panic!("expected Tarball, got {other:?}"), + } + assert!(result.id.as_str().ends_with("#path:/packages/simple-react-app")); + } +} diff --git a/pacquet/crates/resolving-git-resolver/src/hosted_git.rs b/pacquet/crates/resolving-git-resolver/src/hosted_git.rs new file mode 100644 index 0000000000..0f570d962e --- /dev/null +++ b/pacquet/crates/resolving-git-resolver/src/hosted_git.rs @@ -0,0 +1,873 @@ +//! Pacquet port of the subset of +//! [`hosted-git-info`](https://github.com/npm/hosted-git-info/tree/v4.1.0) +//! that pnpm's git resolver uses. v4.1.0 is the major pinned in pnpm's +//! root `package.json` (catalog entry `hosted-git-info: ^4.1.0`) and is +//! what `node_modules/hosted-git-info/` ships at the time of this port. +//! +//! Coverage: +//! +//! - [`HostedGit::from_url`] recognises the GitHub / GitLab / Bitbucket +//! shortcut forms (`github:owner/repo#ref`, the bare `owner/repo#ref` +//! shorthand, `gitlab:…`, `bitbucket:…`), full HTTPS/SSH URLs pointed +//! at the matching domains, and the `git+ssh:` / `git+https:` / `ssh:` / +//! `git:` / `http:` / `https:` protocol families. Anything else +//! (Gitea, self-hosted, generic git+file, …) returns `None` so the +//! caller can fall back to the protocol-prefix dispatch in +//! [`crate::parse_bare_specifier()`]. +//! - The output exposes the URL templates the resolver consumes — +//! `https`, `ssh`, `sshurl`, `tarball`, `shortcut` — each accepting +//! the `no_committish` / `no_git_plus` flags upstream's `_fill` +//! threads in. +//! +//! Deliberate deviations from upstream: +//! +//! - The GitLab tarball template emits `/-/archive//-.tar.gz` +//! directly, matching pnpm's [`gitlabTarballTemplate`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/parseBareSpecifier.ts#L137-L140) +//! override (fix #11533). Upstream hosted-git-info still emits the +//! `/api/v4/projects/%2F/repository/archive.tar.gz` +//! form; pacquet ports the pnpm override, not the raw template. +//! - The `gist` host is not implemented. pnpm's test suite never +//! exercises it and the install path has no gist-shaped store key. +//! - `browse` / `bugs` / `docs` / `file` / `git` templates are not +//! implemented — only `https` / `ssh` / `sshurl` / `tarball` / +//! `shortcut` are used by the resolver. + +use std::fmt::{self, Write}; + +/// Three host families pacquet recognises. Mirrors upstream's +/// `gitHosts` keys at +/// . +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum HostedGitType { + Github, + Gitlab, + Bitbucket, +} + +impl HostedGitType { + fn domain(self) -> &'static str { + match self { + HostedGitType::Github => "github.com", + HostedGitType::Gitlab => "gitlab.com", + HostedGitType::Bitbucket => "bitbucket.org", + } + } + + fn shortcut_prefix(self) -> &'static str { + match self { + HostedGitType::Github => "github", + HostedGitType::Gitlab => "gitlab", + HostedGitType::Bitbucket => "bitbucket", + } + } + + fn from_shortcut(scheme: &str) -> Option { + match scheme { + "github" => Some(HostedGitType::Github), + "gitlab" => Some(HostedGitType::Gitlab), + "bitbucket" => Some(HostedGitType::Bitbucket), + _ => None, + } + } + + fn from_domain(host: &str) -> Option { + // Strip leading `www.` to match upstream's + // `parsed.hostname.startsWith('www.') ? parsed.hostname.slice(4) : parsed.hostname`. + let host = host.strip_prefix("www.").unwrap_or(host); + match host { + "github.com" => Some(HostedGitType::Github), + "gitlab.com" => Some(HostedGitType::Gitlab), + "bitbucket.org" => Some(HostedGitType::Bitbucket), + _ => None, + } + } + + fn supports_protocol(self, proto: &str) -> bool { + match self { + // gitHosts.github.protocols + HostedGitType::Github => { + matches!(proto, "git" | "http" | "git+ssh" | "git+https" | "ssh" | "https") + } + // gitHosts.gitlab.protocols and gitHosts.bitbucket.protocols + HostedGitType::Gitlab | HostedGitType::Bitbucket => { + matches!(proto, "git+ssh" | "git+https" | "ssh" | "https") + } + } + } +} + +/// Parsed git host info. Mirrors upstream's `GitHost` instance fields +/// (sans the unused `default` / `opts` slots). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct HostedGit { + pub host_type: HostedGitType, + pub user: String, + pub auth: Option, + pub project: String, + pub committish: Option, + /// The original protocol the URL came in with. Drives the + /// "default representation" upstream picks for `toString` / + /// `shortcut` round-trips. + default_representation: Representation, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Representation { + Shortcut, + Sshurl, + Https, + Git, + Http, +} + +/// Per-call options for the `_fill`-style URL templates. +#[derive(Debug, Default, Clone, Copy)] +pub struct HostedOpts { + /// Drop the trailing `#` segment. + pub no_committish: bool, + /// Strip the leading `git+` from `https` / `ssh` outputs. + pub no_git_plus: bool, +} + +impl HostedGit { + /// Convenience: build options that omit the committish. + pub fn no_committish() -> HostedOpts { + HostedOpts { no_committish: true, no_git_plus: false } + } + + /// Convenience: drop both `#commit` and the `git+` prefix. + pub fn no_committish_no_git_plus() -> HostedOpts { + HostedOpts { no_committish: true, no_git_plus: true } + } +} + +impl HostedGit { + /// Recognise a git URL the way upstream's + /// [`fromUrl`](https://github.com/npm/hosted-git-info/blob/v4.1.0/index.js#L29-L41) + /// does. + /// + /// Returns `None` when the input names a host pacquet doesn't + /// recognise (Gitea, self-hosted GitLab, a generic + /// `git+file://…`, …), the project is missing, or the URL parses + /// to an unsupported shape (e.g. a bitbucket `/get/…` archive + /// URL — upstream's `extract` returns undefined for those and + /// pacquet mirrors it). + pub fn from_url(giturl: &str) -> Option { + if giturl.is_empty() { + return None; + } + // GitHub shorthand: prepend `github:` and run through the + // shortcut path. Mirrors upstream's + // `isGitHubShorthand(giturl) ? 'github:' + giturl : correctProtocol(giturl)`. + let owned; + let normalised: &str = if is_github_shorthand(giturl) { + owned = format!("github:{giturl}"); + &owned + } else { + owned = correct_protocol(giturl); + &owned + }; + + let parsed = parse_git_url(normalised)?; + // Look up host: shortcut first (so `github://...` wins over the + // host's full URL parsing), then by domain. + let shortcut_type = HostedGitType::from_shortcut(&parsed.scheme); + let domain_type = parsed.host.as_deref().and_then(HostedGitType::from_domain); + let host_type = shortcut_type.or(domain_type)?; + + let auth_protocols = + matches!(parsed.scheme.as_str(), "git" | "https" | "git+https" | "http" | "git+http"); + let auth = if auth_protocols && (parsed.username.is_some() || parsed.password.is_some()) { + let user = parsed.username.as_deref().unwrap_or(""); + if let Some(pw) = parsed.password.as_deref() { + Some(format!("{user}:{pw}")) + } else { + Some(user.to_string()) + } + } else { + None + }; + + let (user, project, committish, default_representation) = if shortcut_type.is_some() { + // Shortcut form: pull user/project out of the opaque + // path. Matches upstream's shortcut branch verbatim. + let mut pathname = parsed.pathname.as_str(); + pathname = pathname.strip_prefix('/').unwrap_or(pathname); + // Strip auth from the path. Upstream notes "we ignore auth + // for shortcuts, so just trim it out". + if let Some(at) = pathname.find('@') { + pathname = &pathname[at + 1..]; + } + let (user, project) = match pathname.rfind('/') { + Some(idx) => { + let user = percent_decode(&pathname[..idx]); + let project = percent_decode(&pathname[idx + 1..]); + let user = if user.is_empty() { None } else { Some(user) }; + (user, project) + } + None => (None, percent_decode(pathname)), + }; + let project = strip_dot_git(&project); + let committish = parsed + .hash + .as_ref() + .map(|hash| percent_decode(hash.strip_prefix('#').unwrap_or(hash))) + .filter(|committish| !committish.is_empty()); + let user = user.unwrap_or_default(); + (user, project, committish, Representation::Shortcut) + } else { + if !host_type.supports_protocol(&parsed.scheme) { + return None; + } + let segments = extract_for_host(host_type, &parsed)?; + let user = percent_decode(&segments.user); + let project = percent_decode(&segments.project); + let committish = segments + .committish + .map(|raw| percent_decode(&raw)) + .filter(|decoded| !decoded.is_empty()); + let representation = protocol_to_representation(&parsed.scheme); + (user, project, committish, representation) + }; + + if project.is_empty() { + return None; + } + + Some(HostedGit { host_type, user, auth, project, committish, default_representation }) + } + + /// Shorthand `:/[#committish]`. Mirrors + /// upstream's `shortcuttemplate`. + pub fn shortcut(&self, opts: HostedOpts) -> String { + let mut out = + format!("{}:{}/{}", self.host_type.shortcut_prefix(), self.user, self.project); + if !opts.no_committish + && let Some(ref c) = self.committish + { + out.push('#'); + out.push_str(c); + } + out + } + + /// `git+https://[auth@]//.git[#committish]`, + /// optionally stripped of `git+`. Mirrors upstream's + /// `httpstemplate` (gitlab and github share the same shape). + pub fn https(&self, opts: HostedOpts) -> Option { + let auth = self.auth.as_deref().map(|a| format!("{a}@")).unwrap_or_default(); + let mut out = format!( + "git+https://{auth}{domain}/{user}/{project}.git", + domain = self.host_type.domain(), + user = self.user, + project = self.project, + ); + if !opts.no_committish + && let Some(ref c) = self.committish + { + out.push('#'); + out.push_str(c); + } + if opts.no_git_plus + && let Some(stripped) = out.strip_prefix("git+") + { + out = stripped.to_string(); + } + Some(out) + } + + /// `git@:/.git[#committish]`. Mirrors + /// upstream's `sshtemplate`. + pub fn ssh(&self, opts: HostedOpts) -> Option { + let mut out = format!( + "git@{domain}:{user}/{project}.git", + domain = self.host_type.domain(), + user = self.user, + project = self.project, + ); + if !opts.no_committish + && let Some(ref c) = self.committish + { + out.push('#'); + out.push_str(c); + } + Some(out) + } + + /// `git+ssh://git@//.git[#committish]`. + /// Mirrors upstream's `sshurltemplate`. + pub fn sshurl(&self, opts: HostedOpts) -> Option { + let mut out = format!( + "git+ssh://git@{domain}/{user}/{project}.git", + domain = self.host_type.domain(), + user = self.user, + project = self.project, + ); + if !opts.no_committish + && let Some(ref c) = self.committish + { + out.push('#'); + out.push_str(c); + } + if opts.no_git_plus + && let Some(stripped) = out.strip_prefix("git+") + { + out = stripped.to_string(); + } + Some(out) + } + + /// Host-specific tarball URL. Mirrors upstream's `tarballtemplate` + /// per host, with one deviation: GitLab uses the + /// `/-/archive//-.tar.gz` shape pnpm overrides + /// the upstream template with at + /// [parseBareSpecifier.ts:137-140](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/parseBareSpecifier.ts#L137-L140) + /// (fix #11533). + /// + /// Returns `None` when no committish is set — every supported host + /// uses an explicit ref or the literal `HEAD` / `master` placeholder + /// from upstream's template. Pacquet only ever invokes + /// `tarball()` after [`crate::resolve_ref::resolve_ref`] has pinned + /// the commit, so the `None` here is precautionary. + pub fn tarball(&self, opts: HostedOpts) -> Option { + // Upstream `tarball()` overrides `noCommittish: false`; even + // when the caller asks to drop the committish elsewhere, the + // tarball needs a ref. Pacquet mirrors that policy: ignore + // `opts.no_committish` here. + let _ = opts; + let committish = self.committish.as_deref()?; + let encoded_committish = encode_uri_component(committish); + Some(match self.host_type { + HostedGitType::Github => format!( + "https://codeload.github.com/{user}/{project}/tar.gz/{ref}", + user = self.user, + project = self.project, + r#ref = encoded_committish, + ), + HostedGitType::Bitbucket => format!( + "https://bitbucket.org/{user}/{project}/get/{ref}.tar.gz", + user = self.user, + project = self.project, + r#ref = encoded_committish, + ), + HostedGitType::Gitlab => format!( + "https://gitlab.com/{user}/{project}/-/archive/{ref}/{project}-{ref}.tar.gz", + user = self.user, + project = self.project, + r#ref = encoded_committish, + ), + }) + } +} + +impl fmt::Display for HostedGit { + /// Mirrors upstream's `toString`: emit the URL form matching the + /// default representation; fall back to `sshurl` when the default + /// isn't a render-able URL (e.g. `shortcut`). + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let opts = HostedOpts::default(); + let rendered = match self.default_representation { + Representation::Sshurl => self.sshurl(opts), + Representation::Https | Representation::Http => self.https(opts), + Representation::Git => self.https(opts), + Representation::Shortcut => Some(self.shortcut(opts)), + }; + let rendered = rendered.unwrap_or_else(|| self.shortcut(opts)); + f.write_str(&rendered) + } +} + +fn protocol_to_representation(protocol: &str) -> Representation { + match protocol { + "git+ssh" | "ssh" => Representation::Sshurl, + "git+https" => Representation::Https, + "git" => Representation::Git, + "https" => Representation::Https, + "http" => Representation::Http, + _ => Representation::Sshurl, + } +} + +fn strip_dot_git(project: &str) -> String { + project.strip_suffix(".git").unwrap_or(project).to_string() +} + +struct ParsedUrl { + scheme: String, + username: Option, + password: Option, + host: Option, + pathname: String, + hash: Option, +} + +/// Attempt `Url::parse`; if it fails, run upstream's `correctUrl` +/// (handles `scheme://user@host:path` SCP-style URLs) and try again. +/// Mirrors upstream's +/// [`parseGitUrl`](https://github.com/npm/hosted-git-info/blob/v4.1.0/index.js#L221-L237). +fn parse_git_url(giturl: &str) -> Option { + if let Some(parsed) = whatwg_parse(giturl) { + return Some(parsed); + } + whatwg_parse(&correct_url(giturl)) +} + +/// Convert a `url::Url` (via `reqwest::Url`) into the same fields +/// hosted-git-info reads off Node's `URL`. Falls back to a manual +/// split for non-standard schemes the `url` crate refuses (rare). +fn whatwg_parse(giturl: &str) -> Option { + let parsed = reqwest::Url::parse(giturl).ok()?; + let scheme = parsed.scheme().to_string(); + let username = + if parsed.username().is_empty() { None } else { Some(parsed.username().to_string()) }; + let password = parsed.password().map(str::to_string); + let host = parsed.host_str().map(str::to_string); + let pathname = if parsed.cannot_be_a_base() { + // Non-base URLs (e.g. `github:owner/repo`) keep the whole + // post-scheme tail in `path()`. + parsed.path().to_string() + } else { + parsed.path().to_string() + }; + let hash = parsed.fragment().map(|f| format!("#{f}")); + Some(ParsedUrl { scheme, username, password, host, pathname, hash }) +} + +/// Mirrors upstream's +/// [`correctProtocol`](https://github.com/npm/hosted-git-info/blob/v4.1.0/index.js#L130-L152): +/// for inputs that already use a known scheme, return as-is; for +/// `user@host:path` SCP-style strings, prepend `git+ssh://`; otherwise, +/// insert the missing `//` after the first colon. Pacquet mirrors the +/// `knownProtocols` set (`github:`, `gitlab:`, `bitbucket:`, `http:`, +/// `https:`, `git:`, `git+ssh:`, `git+https:`, `ssh:`). +fn correct_protocol(input: &str) -> String { + let Some(first_colon) = input.find(':') else { + return input.to_string(); + }; + let proto = &input[..=first_colon]; + const KNOWN: &[&str] = &[ + "github:", + "gitlab:", + "bitbucket:", + "http:", + "https:", + "git:", + "git+ssh:", + "git+https:", + "ssh:", + ]; + if KNOWN.contains(&proto) { + return input.to_string(); + } + if let Some(first_at) = input.find('@') { + if first_at > first_colon { + return format!("git+ssh://{input}"); + } + return input.to_string(); + } + if let Some(double_slash) = input.find("//") + && double_slash == first_colon + 1 + { + return input.to_string(); + } + format!("{}//{}", &input[..=first_colon], &input[first_colon + 1..]) +} + +/// SCP-style URL repair. Mirrors upstream's +/// [`correctUrl`](https://github.com/npm/hosted-git-info/blob/v4.1.0/index.js#L183-L216). +fn correct_url(giturl: &str) -> String { + let first_at = giturl.find('@'); + let last_hash = giturl.rfind('#'); + let _first_colon = giturl.find(':'); + let upper_bound = last_hash.unwrap_or(giturl.len()); + let last_colon = giturl[..upper_bound].rfind(':'); + + let mut corrected = giturl.to_string(); + if let (Some(last_colon), Some(first_at)) = (last_colon, first_at) + && last_colon > first_at + { + corrected = format!("{}/{}", &giturl[..last_colon], &giturl[last_colon + 1..]); + } else if first_at.is_some() && last_colon.is_some() { + // first_at >= last_colon: leave as-is + } + + let first_colon = corrected.find(':'); + if first_colon.is_none() && !corrected.contains("//") { + corrected = format!("git+ssh://{corrected}"); + } + corrected +} + +/// `isGitHubShorthand` from upstream. Detects the bare `owner/repo` +/// form that pnpm registers as a github short link. +fn is_github_shorthand(arg: &str) -> bool { + // empty input is not a shortcut + if arg.is_empty() { + return false; + } + let first_hash = arg.find('#'); + let first_slash = arg.find('/'); + let second_slash = + first_slash.and_then(|first| arg[first + 1..].find('/').map(|rest| first + 1 + rest)); + let first_colon = arg.find(':'); + let first_space = arg.find(|ch: char| ch.is_whitespace()); + let first_at = arg.find('@'); + + let space_only_after_hash = first_space.is_none() + || (first_hash.is_some() && first_space.unwrap() > first_hash.unwrap()); + let at_only_after_hash = + first_at.is_none() || (first_hash.is_some() && first_at.unwrap() > first_hash.unwrap()); + let colon_only_after_hash = first_colon.is_none() + || (first_hash.is_some() && first_colon.unwrap() > first_hash.unwrap()); + let second_slash_only_after_hash = second_slash.is_none() + || (first_hash.is_some() && second_slash.unwrap() > first_hash.unwrap()); + let has_slash = first_slash.is_some_and(|first| first > 0); + let does_not_end_with_slash = match first_hash { + Some(hash) if hash > 0 => arg.as_bytes()[hash - 1] != b'/', + _ => !arg.ends_with('/'), + }; + let does_not_start_with_dot = !arg.starts_with('.'); + + space_only_after_hash + && has_slash + && does_not_end_with_slash + && does_not_start_with_dot + && at_only_after_hash + && colon_only_after_hash + && second_slash_only_after_hash +} + +struct Segments { + user: String, + project: String, + committish: Option, +} + +fn extract_for_host(host: HostedGitType, parsed: &ParsedUrl) -> Option { + match host { + HostedGitType::Github => extract_github(parsed), + HostedGitType::Bitbucket => extract_bitbucket(parsed), + HostedGitType::Gitlab => extract_gitlab(parsed), + } +} + +/// Port of `gitHosts.github.extract`. +fn extract_github(parsed: &ParsedUrl) -> Option { + let path = parsed.pathname.trim_start_matches('/'); + let mut parts = path.splitn(4, '/'); + let user = parts.next()?.to_string(); + let mut project = parts.next()?.to_string(); + let r#type = parts.next().map(str::to_string); + let mut committish = parts.next().map(str::to_string); + + if let Some(ref t) = r#type + && t != "tree" + { + return None; + } + + if r#type.is_none() { + committish = + parsed.hash.as_deref().map(|hash| hash.strip_prefix('#').unwrap_or(hash).to_string()); + } + + if project.ends_with(".git") { + project = project[..project.len() - 4].to_string(); + } + + if user.is_empty() || project.is_empty() { + return None; + } + + Some(Segments { user, project, committish }) +} + +/// Port of `gitHosts.bitbucket.extract`. +fn extract_bitbucket(parsed: &ParsedUrl) -> Option { + let path = parsed.pathname.trim_start_matches('/'); + let mut parts = path.splitn(4, '/'); + let user = parts.next()?.to_string(); + let mut project = parts.next()?.to_string(); + let aux = parts.next().map(str::to_string); + + if aux.as_deref() == Some("get") { + return None; + } + if project.ends_with(".git") { + project = project[..project.len() - 4].to_string(); + } + if user.is_empty() || project.is_empty() { + return None; + } + let committish = parsed + .hash + .as_deref() + .map(|hash| hash.strip_prefix('#').unwrap_or(hash).to_string()) + .filter(|committish| !committish.is_empty()); + Some(Segments { user, project, committish }) +} + +/// Port of `gitHosts.gitlab.extract`. +fn extract_gitlab(parsed: &ParsedUrl) -> Option { + let path = parsed.pathname.trim_start_matches('/').to_string(); + if path.contains("/-/") || path.contains("/archive.tar.gz") { + return None; + } + let mut segments: Vec<&str> = path.split('/').collect(); + let mut project = segments.pop()?.to_string(); + if project.ends_with(".git") { + project = project[..project.len() - 4].to_string(); + } + let user = segments.join("/"); + if user.is_empty() || project.is_empty() { + return None; + } + let committish = parsed + .hash + .as_deref() + .map(|hash| hash.strip_prefix('#').unwrap_or(hash).to_string()) + .filter(|committish| !committish.is_empty()); + Some(Segments { user, project, committish }) +} + +/// Match Node's `decodeURIComponent` for the inputs hosted-git-info +/// sees: percent-decode standard escapes, leave malformed sequences +/// alone (upstream surfaces them via `URIError`, which falls through to +/// returning `None` at the call site). Pacquet keeps the input as-is on +/// malformed input — the affected URLs are caught elsewhere when the +/// downstream parse fails. +fn percent_decode(input: &str) -> String { + // Decode `%XX` triples to raw bytes first, then reassemble as + // UTF-8 so multibyte sequences (e.g. a `%E2%80%A6` ellipsis) are + // reconstructed correctly. Fall back to the original input if the + // resulting byte stream isn't valid UTF-8 — that matches Node's + // `decodeURIComponent` throwing a `URIError`, which upstream's + // `try/catch` in `hosted-git-info`'s `fromUrl` swallows. + let mut buf: Vec = Vec::with_capacity(input.len()); + let bytes = input.as_bytes(); + let mut idx = 0; + while idx < bytes.len() { + if bytes[idx] == b'%' + && idx + 2 < bytes.len() + && let (Some(hi), Some(lo)) = + ((bytes[idx + 1] as char).to_digit(16), (bytes[idx + 2] as char).to_digit(16)) + { + buf.push((hi * 16 + lo) as u8); + idx += 3; + continue; + } + buf.push(bytes[idx]); + idx += 1; + } + String::from_utf8(buf).unwrap_or_else(|_| input.to_string()) +} + +/// Match Node's `encodeURIComponent`. Percent-encode every byte +/// outside the safe ASCII set Node keeps unencoded: +/// `A-Z a-z 0-9 - _ . ! ~ * ' ( )`. +fn encode_uri_component(input: &str) -> String { + let mut out = String::with_capacity(input.len()); + for byte in input.bytes() { + let safe = byte.is_ascii_alphanumeric() + || matches!(byte, b'-' | b'_' | b'.' | b'!' | b'~' | b'*' | b'\'' | b'(' | b')'); + if safe { + out.push(byte as char); + } else { + write!(&mut out, "%{byte:02X}").expect("write to String never fails"); + } + } + out +} + +#[cfg(test)] +mod tests { + use super::{HostedGit, HostedGitType, HostedOpts}; + + #[test] + fn github_shortcut_user_repo() { + let h = HostedGit::from_url("zkochan/is-negative").expect("recognised"); + assert_eq!(h.host_type, HostedGitType::Github); + assert_eq!(h.user, "zkochan"); + assert_eq!(h.project, "is-negative"); + assert_eq!(h.committish, None); + } + + #[test] + fn github_shortcut_with_commit() { + let h = HostedGit::from_url("zkochan/is-negative#163360a8d3ae6bee9524541043197ff356f8ed99") + .expect("recognised"); + assert_eq!(h.committish.as_deref(), Some("163360a8d3ae6bee9524541043197ff356f8ed99")); + } + + #[test] + fn github_colon_shortcut() { + let h = HostedGit::from_url("github:zkochan/is-negative#canary").expect("recognised"); + assert_eq!(h.host_type, HostedGitType::Github); + assert_eq!(h.user, "zkochan"); + assert_eq!(h.project, "is-negative"); + assert_eq!(h.committish.as_deref(), Some("canary")); + } + + #[test] + fn https_full_url() { + let h = + HostedGit::from_url("https://github.com/zkochan/is-negative.git#2.0.1").expect("ok"); + assert_eq!(h.host_type, HostedGitType::Github); + assert_eq!(h.user, "zkochan"); + assert_eq!(h.project, "is-negative"); + assert_eq!(h.committish.as_deref(), Some("2.0.1")); + } + + #[test] + fn git_plus_ssh_url() { + let h = HostedGit::from_url("git+ssh://git@github.com/zkochan/is-negative.git#2.0.1") + .expect("ok"); + assert_eq!(h.user, "zkochan"); + assert_eq!(h.project, "is-negative"); + } + + #[test] + fn bitbucket_shortcut() { + let h = HostedGit::from_url("bitbucket:pnpmjs/git-resolver#0.3.4").expect("ok"); + assert_eq!(h.host_type, HostedGitType::Bitbucket); + assert_eq!(h.user, "pnpmjs"); + assert_eq!(h.project, "git-resolver"); + assert_eq!(h.committish.as_deref(), Some("0.3.4")); + } + + #[test] + fn gitlab_shortcut() { + let h = HostedGit::from_url("gitlab:pnpm/git-resolver").expect("ok"); + assert_eq!(h.host_type, HostedGitType::Gitlab); + assert_eq!(h.user, "pnpm"); + assert_eq!(h.project, "git-resolver"); + } + + #[test] + fn https_gitlab_url() { + let h = HostedGit::from_url("https://gitlab.com/pnpmjs/git-resolver").expect("ok"); + assert_eq!(h.host_type, HostedGitType::Gitlab); + assert_eq!(h.user, "pnpmjs"); + assert_eq!(h.project, "git-resolver"); + } + + #[test] + fn rejects_non_hosted() { + // Gitea / generic .git URLs are not recognised by hosted-git-info. + assert!( + HostedGit::from_url("https://gitea.osmocom.org/ttcn3/highlightjs-ttcn3.git").is_none(), + ); + } + + #[test] + fn rejects_random_string() { + assert!(HostedGit::from_url("not-a-url").is_none()); + assert!(HostedGit::from_url("").is_none()); + } + + #[test] + fn rejects_relative_path() { + // Starts with `.`, fails isGitHubShorthand. + assert!(HostedGit::from_url("./local-dep").is_none()); + } + + #[test] + fn shortcut_render() { + let h = HostedGit::from_url("zkochan/is-negative#163360a8d3ae6bee9524541043197ff356f8ed99") + .expect("ok"); + assert_eq!( + h.shortcut(HostedOpts::default()), + "github:zkochan/is-negative#163360a8d3ae6bee9524541043197ff356f8ed99", + ); + assert_eq!(h.shortcut(HostedGit::no_committish()), "github:zkochan/is-negative"); + } + + #[test] + fn https_render_with_commit() { + let h = HostedGit::from_url("zkochan/is-negative").expect("ok"); + assert_eq!( + h.https(HostedOpts::default()).unwrap(), + "git+https://github.com/zkochan/is-negative.git", + ); + assert_eq!( + h.https(HostedGit::no_committish_no_git_plus()).unwrap(), + "https://github.com/zkochan/is-negative.git", + ); + } + + #[test] + fn ssh_render() { + let h = HostedGit::from_url("foo/bar").expect("ok"); + assert_eq!(h.ssh(HostedOpts::default()).unwrap(), "git@github.com:foo/bar.git"); + assert_eq!( + h.sshurl(HostedOpts::default()).unwrap(), + "git+ssh://git@github.com/foo/bar.git", + ); + assert_eq!( + h.sshurl(HostedGit::no_committish()).unwrap(), + "git+ssh://git@github.com/foo/bar.git", + ); + } + + #[test] + fn tarball_github() { + let mut h = HostedGit::from_url("zkochan/is-negative").expect("ok"); + h.committish = Some("163360a8d3ae6bee9524541043197ff356f8ed99".to_string()); + assert_eq!( + h.tarball(HostedOpts::default()).unwrap(), + "https://codeload.github.com/zkochan/is-negative/tar.gz/163360a8d3ae6bee9524541043197ff356f8ed99", + ); + } + + #[test] + fn tarball_bitbucket() { + let mut h = HostedGit::from_url("bitbucket:foo/bar").expect("ok"); + h.committish = Some("abc123".to_string()); + assert_eq!( + h.tarball(HostedOpts::default()).unwrap(), + "https://bitbucket.org/foo/bar/get/abc123.tar.gz", + ); + } + + #[test] + fn tarball_gitlab_uses_archive_path() { + // Regression for pnpm #11533: the tarball must not embed + // `%2F`. The `/-/archive//-.tar.gz` form + // doesn't. + let mut h = HostedGit::from_url("gitlab:pnpmjs/git-resolver").expect("ok"); + h.committish = Some("988c61e11dc8d9ca0b5580cb15291951812549dc".to_string()); + let tarball = h.tarball(HostedOpts::default()).unwrap(); + assert!(!tarball.contains("%2F"), "tarball must not contain `%2F`: {tarball}"); + assert_eq!( + tarball, + "https://gitlab.com/pnpmjs/git-resolver/-/archive/988c61e11dc8d9ca0b5580cb15291951812549dc/git-resolver-988c61e11dc8d9ca0b5580cb15291951812549dc.tar.gz", + ); + } + + #[test] + fn tarball_returns_none_when_no_committish() { + let h = HostedGit::from_url("zkochan/is-negative").expect("ok"); + assert!(h.tarball(HostedOpts::default()).is_none()); + } + + #[test] + fn https_with_auth() { + let h = HostedGit::from_url("git+https://0000000000000000000000000000000000000000:x-oauth-basic@github.com/foo/bar.git").expect("ok"); + assert_eq!( + h.https(HostedGit::no_committish_no_git_plus()).unwrap(), + "https://0000000000000000000000000000000000000000:x-oauth-basic@github.com/foo/bar.git", + ); + assert!(h.auth.is_some()); + } + + #[test] + fn percent_decode_reassembles_utf8_sequences() { + // `%E2%80%A6` is U+2026 (ellipsis) in UTF-8. A byte-wise + // decoder would emit two Latin-1 chars; a UTF-8-aware decoder + // restores the original ellipsis. + assert_eq!(super::percent_decode("a%E2%80%A6b"), "a\u{2026}b"); + // Branch / tag with a percent-encoded scope-style slash + // (`@foo/bar` → `%40foo%2Fbar`). + assert_eq!(super::percent_decode("%40foo%2Fbar"), "@foo/bar"); + } +} diff --git a/pacquet/crates/resolving-git-resolver/src/lib.rs b/pacquet/crates/resolving-git-resolver/src/lib.rs new file mode 100644 index 0000000000..08222fcb15 --- /dev/null +++ b/pacquet/crates/resolving-git-resolver/src/lib.rs @@ -0,0 +1,50 @@ +//! Pacquet port of pnpm's +//! [`@pnpm/resolving.git-resolver`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/index.ts). +//! +//! Resolves dependencies whose `bareSpecifier` names a git repository: +//! the GitHub / GitLab / Bitbucket short-hands (`github:owner/repo#ref`, +//! `gitlab:…`, `bitbucket:…`, the bare `owner/repo` form), git-scheme +//! URLs (`git+ssh`, `git+https`, `git+file`, plain `ssh`, …), and the +//! plain `https://host/repo.git[#ref]` shape some hosts (Gitea, …) +//! serve. +//! +//! Three pieces: +//! +//! - [`create_git_hosted_pkg_id()`] — pure ID builder for git resolutions. +//! Ports +//! [`createGitHostedPkgId.ts`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/createGitHostedPkgId.ts). +//! - [`parse_bare_specifier()`] — recognise + normalise the input string, +//! resolve hosted-vs-private (HTTP HEAD probe + `git ls-remote --exit-code` +//! reachability check), pick a `fetchSpec`. Ports +//! [`parseBareSpecifier.ts`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/parseBareSpecifier.ts). +//! - [`GitResolver`] — the [`Resolver`](pacquet_resolving_resolver_base::Resolver) +//! impl that drives the two above, runs `git ls-remote` to pin a +//! commit, and emits either a `Tarball{gitHosted: true}` or `Git` +//! resolution. Ports +//! [`index.ts`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/index.ts). +//! +//! Out of scope: +//! +//! - The `prev_specifier` short-circuit (upstream's `currentPkg && !update` +//! branch). Pacquet doesn't thread `currentPkg` through the seam yet +//! — the resolver always re-runs `ls-remote`. Restore the fast path +//! when `currentPkg` lands on `ResolveOptions`. +//! - Proxy / TLS plumbing on the HTTP HEAD probe — the probe uses the +//! default [`pacquet_network::ThrottledClient`], same as the rest of +//! the install path. + +mod create_git_hosted_pkg_id; +mod git_resolver; +mod hosted_git; +mod parse_bare_specifier; +mod resolve_ref; +mod runners; + +pub use create_git_hosted_pkg_id::create_git_hosted_pkg_id; +pub use git_resolver::GitResolver; +pub use hosted_git::{HostedGit, HostedGitType, HostedOpts}; +pub use parse_bare_specifier::{ + GitProbe, HostedPackageSpec, PartialSpec, ProbeFuture, parse_bare_specifier, +}; +pub use resolve_ref::{GitCommandRunner, GitResolveRefError, GitRunError, resolve_ref}; +pub use runners::{RealGitProbe, RealGitRunner}; diff --git a/pacquet/crates/resolving-git-resolver/src/parse_bare_specifier.rs b/pacquet/crates/resolving-git-resolver/src/parse_bare_specifier.rs new file mode 100644 index 0000000000..ede396c69a --- /dev/null +++ b/pacquet/crates/resolving-git-resolver/src/parse_bare_specifier.rs @@ -0,0 +1,594 @@ +//! Ports pnpm's +//! [`parseBareSpecifier.ts`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/parseBareSpecifier.ts). +//! +//! Two-phase API mirrors the upstream split between the sync +//! protocol-prefix dispatch and the async hosted-repo probe: +//! +//! * [`parse_bare_specifier`] runs the synchronous part. Returns +//! `None` when the input isn't a git-shaped specifier (so the +//! resolver chain falls through to the next resolver). +//! * [`PartialSpec::finalize`] runs the async part. For hosted +//! specs it picks between https / ssh based on the +//! [`GitProbe`] callbacks (HTTP HEAD + `git ls-remote --exit-code`); +//! for protocol-prefix specs the spec is already complete and the +//! probe is unused. + +use std::{future::Future, pin::Pin}; + +use crate::hosted_git::{HostedGit, HostedOpts}; + +/// Fully resolved spec consumed by [`crate::git_resolver::GitResolver`]. +/// +/// Mirrors upstream's +/// [`HostedPackageSpec`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/parseBareSpecifier.ts#L8-L21). +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct HostedPackageSpec { + /// URL passed to `git ls-remote`. Always carries no committish — + /// the committish lives in [`Self::git_committish`] / + /// [`Self::git_range`]. + pub fetch_spec: String, + /// Original `HostedGit` parse, when the input matched a known + /// host. Drives [`crate::GitResolver`]'s tarball vs git-resolution + /// decision. + pub hosted: Option, + /// What the resolver echoes back to the manifest as + /// `normalizedBareSpecifier`. For hosted inputs this is the + /// `shortcut()` form; for protocol-prefix inputs it is the + /// caller's original bare specifier. + pub normalized_bare_specifier: String, + pub git_committish: Option, + pub git_range: Option, + pub path: Option, +} + +/// Output of the sync prefilter [`parse_bare_specifier`]. +pub enum PartialSpec { + /// Hosted input: needs an async probe to decide https/ssh routing. + Hosted(HostedGit), + /// Protocol-prefix input: already finalised, no probe needed. + Direct(HostedPackageSpec), +} + +impl PartialSpec { + /// Drive the async leg. For [`PartialSpec::Direct`] the probe is + /// ignored. + pub async fn finalize(self, probe: &Probe) -> HostedPackageSpec { + match self { + PartialSpec::Direct(spec) => spec, + PartialSpec::Hosted(hosted) => from_hosted_git(hosted, probe).await, + } + } +} + +/// Boxed-future return type used by [`GitProbe`]. Same shape as the +/// rest of pacquet's async traits (see `ResolveFuture`). +pub type ProbeFuture<'a> = Pin + Send + 'a>>; + +/// Capability seam for the network and git invocations the hosted +/// branch needs. +/// +/// Real installs supply an implementation that issues an HTTP HEAD via +/// the install-wide [`pacquet_network::ThrottledClient`] and shells +/// out to `git ls-remote --exit-code`. Tests supply a fake that +/// records calls and yields canned values without touching the +/// network or the system git binary. +pub trait GitProbe: Send + Sync { + /// `true` when an HTTP HEAD to the given URL returned a 2xx / + /// 3xx. Used to detect public repos before running `git ls-remote` + /// (which would otherwise prompt for credentials on a private + /// repo). Mirrors upstream's + /// [`isRepoPublic`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/parseBareSpecifier.ts#L142-L149). + fn https_head_ok<'a>(&'a self, url: &'a str) -> ProbeFuture<'a>; + + /// `true` when `git ls-remote --exit-code HEAD` exited zero. + /// Used as a reachability test on both the https and ssh + /// candidates. Mirrors upstream's + /// [`accessRepository`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/parseBareSpecifier.ts#L151-L158). + fn ls_remote_exit_code<'a>(&'a self, repo: &'a str) -> ProbeFuture<'a>; +} + +const GIT_PROTOCOLS: &[&str] = + &["git", "git+http", "git+https", "git+rsync", "git+ftp", "git+file", "git+ssh", "ssh"]; + +/// Sync prefilter. Returns `None` when the input isn't a git-shaped +/// specifier — the resolver chain treats this as "no claim" and falls +/// through. +/// +/// Mirrors upstream's +/// [`parseBareSpecifier`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/parseBareSpecifier.ts#L34-L59). +pub fn parse_bare_specifier(bare: &str) -> Option { + if let Some(hosted) = HostedGit::from_url(bare) { + return Some(PartialSpec::Hosted(hosted)); + } + let colons_pos = bare.find(':')?; + let protocol = &bare[..colons_pos]; + let proto_lower = protocol.to_ascii_lowercase(); + let is_git_url = GIT_PROTOCOLS.contains(&proto_lower.as_str()) + || ((proto_lower == "http" || proto_lower == "https") && contains_dot_git_at_end(bare)); + if !is_git_url { + return None; + } + let corrected = correct_url(bare); + let parsed = reqwest::Url::parse(&corrected).ok()?; + let hash = parsed.fragment().filter(|f| !f.is_empty()).map(percent_decode_str); + let params = parse_git_params(hash.as_deref()); + Some(PartialSpec::Direct(HostedPackageSpec { + fetch_spec: url_to_fetch_spec(&parsed), + hosted: None, + normalized_bare_specifier: bare.to_string(), + git_committish: params.git_committish, + git_range: params.git_range, + path: params.path, + })) +} + +/// Check whether the input contains `.git` as a path suffix (`.git#` or +/// `.git` at end-of-string). Mirrors upstream's `/\.git(?:#|$)/` regex. +fn contains_dot_git_at_end(bare: &str) -> bool { + let mut iter = bare.match_indices(".git"); + iter.any(|(idx, _)| { + let after = &bare[idx + 4..]; + after.is_empty() || after.starts_with('#') + }) +} + +/// Strip the URL's fragment, format it, and drop the `git+` prefix +/// so the result is a plain transport URL. Mirrors upstream's +/// [`urlToFetchSpec`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/parseBareSpecifier.ts#L61-L68). +fn url_to_fetch_spec(parsed: &reqwest::Url) -> String { + let mut clone = parsed.clone(); + clone.set_fragment(None); + let formatted = clone.to_string(); + formatted.strip_prefix("git+").map(str::to_string).unwrap_or(formatted) +} + +/// Run upstream's +/// [`correctUrl`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/parseBareSpecifier.ts#L183-L201) +/// on the input. Strips a leading `git+` and rewrites the SCP-style +/// `ssh://user@host:path` shape into a standard `ssh://user@host/path` +/// so `Url::parse` will accept it. +fn correct_url(input: &str) -> String { + let mut url = + input.strip_prefix("git+").map(str::to_string).unwrap_or_else(|| input.to_string()); + if !url.starts_with("ssh://") { + let mut out = String::with_capacity(url.len() + 4); + if input.starts_with("git+") { + out.push_str("git+"); + } + out.push_str(&url); + return out; + } + + // ssh://... case: pull off `#hash` first, split path, look for SCP-style + // colon in the authority, and convert it to a slash. + let (head, hash) = match url.find('#') { + Some(idx) => (url[..idx].to_string(), url[idx..].to_string()), + None => (url, String::new()), + }; + url = head; + + let body = &url[6..]; // strip leading "ssh://" + let (auth, path_parts): (&str, Vec<&str>) = match body.find('/') { + Some(idx) => (&body[..idx], body[idx + 1..].split('/').collect()), + None => (body, Vec::new()), + }; + // After the `@`, the host portion may carry an SCP-style colon + // that the URL parser cannot consume. Convert the last colon in + // the host into a `/`, unless it's followed by a numeric port. + let host = auth.rsplit_once('@').map(|(_, host)| host).unwrap_or(auth); + let port_pattern_present = host.rfind(':').is_some_and(|idx| { + host[idx + 1..].chars().all(|byte| byte.is_ascii_digit()) && !host[idx + 1..].is_empty() + }); + let host_has_colon = host.contains(':'); + if host_has_colon && !port_pattern_present { + let auth_parts: Vec<&str> = auth.split(':').collect(); + let protocol = "ssh"; + // `auth_parts[..-1] join ':' + '/' + auth_parts[-1]` + let new_auth = if auth_parts.len() >= 2 { + let last = auth_parts[auth_parts.len() - 1]; + let rest = auth_parts[..auth_parts.len() - 1].join(":"); + format!("{rest}/{last}") + } else { + auth.to_string() + }; + let path_tail = if path_parts.is_empty() { + String::new() + } else { + format!("/{}", path_parts.join("/")) + }; + let prefix = if input.starts_with("git+") { "git+" } else { "" }; + return format!("{prefix}{protocol}://{new_auth}{path_tail}{hash}"); + } + + let prefix = if input.starts_with("git+") { "git+" } else { "" }; + format!("{prefix}{url}{hash}") +} + +#[derive(Debug, Default)] +struct GitParsedParams { + git_committish: Option, + git_range: Option, + path: Option, +} + +/// Mirrors upstream's +/// [`parseGitParams`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/parseBareSpecifier.ts#L162-L179). +fn parse_git_params(committish: Option<&str>) -> GitParsedParams { + let mut out = GitParsedParams::default(); + let Some(committish) = committish else { return out }; + if committish.is_empty() { + return out; + } + for param in committish.split('&') { + if let Some(range) = param.strip_prefix("semver:") { + out.git_range = Some(range.to_string()); + } else if let Some(path) = param.strip_prefix("path:") { + out.path = Some(path.to_string()); + } else { + out.git_committish = Some(param.to_string()); + } + } + out +} + +/// Async leg: probe the hosted host for public-vs-private + ssh +/// reachability, pick a `fetchSpec`. Mirrors upstream's +/// [`fromHostedGit`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/parseBareSpecifier.ts#L70-L132). +async fn from_hosted_git( + hosted: HostedGit, + probe: &Probe, +) -> HostedPackageSpec { + let mut fetch_spec: Option = None; + + let git_https_url = hosted.https(HostedGit::no_committish_no_git_plus()); + if let Some(ref https_url) = git_https_url + && probe.https_head_ok(https_url).await + && probe.ls_remote_exit_code(https_url).await + { + fetch_spec = Some(https_url.clone()); + } + + if fetch_spec.is_none() { + let ssh_url = hosted.ssh(HostedGit::no_committish()); + if let Some(ref url) = ssh_url + && probe.ls_remote_exit_code(url).await + { + fetch_spec = Some(url.clone()); + } + } + + if fetch_spec.is_none() + && let Some(https_url) = hosted.https(HostedGit::no_committish_no_git_plus()) + { + // Private repo or HEAD probe failed: try `https` (with auth if + // present) directly, gated on ls-remote reachability. + let has_auth = hosted.auth.is_some(); + let probe_succeeded = if has_auth || !probe.https_head_ok(&https_url).await { + probe.ls_remote_exit_code(&https_url).await + } else { + false + }; + if probe_succeeded { + let params = parse_git_params(hosted.committish.as_deref()); + return HostedPackageSpec { + fetch_spec: https_url.clone(), + hosted: Some(strip_committish(hosted)), + normalized_bare_specifier: format!("git+{https_url}"), + git_committish: params.git_committish, + git_range: params.git_range, + path: params.path, + }; + } + // Upstream tries an additional HEAD probe on the bare URL + // (no `.git` suffix) to confirm the path resolves at all + // before falling through to ssh. Pacquet mirrors this only + // when there's no `auth`: with auth, the path is the auth- + // gated private URL above. Without auth, retest as below. + if !has_auth { + let stripped = https_url.strip_suffix(".git").unwrap_or(&https_url); + if probe.https_head_ok(stripped).await { + fetch_spec = Some(https_url.clone()); + } + } + } + + // Final fallback: `git+ssh` URL form. Matches upstream's + // `fetchSpec = hosted.sshurl({ noCommittish: true })`. + let fetch_spec = fetch_spec + .or_else(|| hosted.sshurl(HostedGit::no_committish())) + .unwrap_or_else(|| hosted.shortcut(HostedOpts::default())); + + let params = parse_git_params(hosted.committish.as_deref()); + HostedPackageSpec { + fetch_spec, + normalized_bare_specifier: hosted.shortcut(HostedOpts::default()), + hosted: Some(hosted), + git_committish: params.git_committish, + git_range: params.git_range, + path: params.path, + } +} + +fn strip_committish(mut hosted: HostedGit) -> HostedGit { + hosted.committish = None; + hosted +} + +fn percent_decode_str(input: &str) -> String { + // See [`crate::hosted_git`]'s `percent_decode` for the same UTF-8 + // reassembly rationale. + let mut buf: Vec = Vec::with_capacity(input.len()); + let bytes = input.as_bytes(); + let mut idx = 0; + while idx < bytes.len() { + if bytes[idx] == b'%' + && idx + 2 < bytes.len() + && let (Some(hi), Some(lo)) = + ((bytes[idx + 1] as char).to_digit(16), (bytes[idx + 2] as char).to_digit(16)) + { + buf.push((hi * 16 + lo) as u8); + idx += 3; + continue; + } + buf.push(bytes[idx]); + idx += 1; + } + String::from_utf8(buf).unwrap_or_else(|_| input.to_string()) +} + +#[cfg(test)] +mod tests { + use std::sync::Mutex; + + use super::{ + GitProbe, PartialSpec, ProbeFuture, correct_url, parse_bare_specifier, parse_git_params, + }; + + struct Fake { + head_ok: bool, + ls_ok: bool, + calls: Mutex>, + } + + impl GitProbe for Fake { + fn https_head_ok<'a>(&'a self, url: &'a str) -> ProbeFuture<'a> { + Box::pin(async move { + self.calls.lock().unwrap().push(format!("head {url}")); + self.head_ok + }) + } + fn ls_remote_exit_code<'a>(&'a self, repo: &'a str) -> ProbeFuture<'a> { + Box::pin(async move { + self.calls.lock().unwrap().push(format!("ls {repo}")); + self.ls_ok + }) + } + } + + fn fake() -> Fake { + Fake { head_ok: true, ls_ok: true, calls: Mutex::new(Vec::new()) } + } + + #[test] + fn rejects_non_git_url() { + assert!(parse_bare_specifier("1.2.3").is_none()); + assert!(parse_bare_specifier("https://example.com/package.tar.gz").is_none()); + assert!(parse_bare_specifier("https://example.com/file").is_none()); + } + + #[test] + fn parses_github_shortcut_to_hosted() { + let kind = parse_bare_specifier("zkochan/is-negative#1.0.0").expect("hosted"); + assert!(matches!(kind, PartialSpec::Hosted(_))); + } + + #[test] + fn parses_plain_https_dot_git_to_direct() { + let kind = + parse_bare_specifier("https://gitea.osmocom.org/ttcn3/highlightjs-ttcn3.git#abc") + .expect("direct"); + match kind { + PartialSpec::Direct(spec) => { + assert_eq!( + spec.fetch_spec, + "https://gitea.osmocom.org/ttcn3/highlightjs-ttcn3.git", + ); + assert_eq!(spec.git_committish.as_deref(), Some("abc")); + } + _ => panic!("expected Direct"), + } + } + + #[test] + fn parse_git_params_splits_semver_path_committish() { + let p = parse_git_params(Some("semver:^1.0.0")); + assert_eq!(p.git_range.as_deref(), Some("^1.0.0")); + assert!(p.git_committish.is_none()); + + let p = parse_git_params(Some("path:/sub")); + assert_eq!(p.path.as_deref(), Some("/sub")); + + let p = parse_git_params(Some("beta&path:/packages/x")); + assert_eq!(p.git_committish.as_deref(), Some("beta")); + assert_eq!(p.path.as_deref(), Some("/packages/x")); + } + + #[test] + fn correct_url_rewrites_scp_style_colon() { + assert_eq!( + correct_url("ssh://username:password@example.com:repo.git"), + "ssh://username:password@example.com/repo.git", + ); + assert_eq!( + correct_url("git+ssh://username:password@example.com:repo.git"), + "git+ssh://username:password@example.com/repo.git", + ); + } + + #[test] + fn correct_url_keeps_numeric_port() { + assert_eq!( + correct_url("ssh://username:password@example.com:22/repo/@foo.git"), + "ssh://username:password@example.com:22/repo/@foo.git", + ); + } + + #[tokio::test] + async fn finalize_direct_returns_spec_unchanged() { + let kind = parse_bare_specifier("git+https://example.com/repo.git#abc").expect("direct"); + let probe = fake(); + let spec = kind.finalize(&probe).await; + assert_eq!(spec.fetch_spec, "https://example.com/repo.git"); + assert_eq!(spec.git_committish.as_deref(), Some("abc")); + // Direct spec shouldn't probe. + assert!(probe.calls.lock().unwrap().is_empty()); + } + + #[tokio::test] + async fn finalize_hosted_prefers_https_when_public() { + let kind = parse_bare_specifier("zkochan/is-negative").expect("hosted"); + let probe = fake(); + let spec = kind.finalize(&probe).await; + assert_eq!(spec.fetch_spec, "https://github.com/zkochan/is-negative.git"); + assert!(spec.hosted.is_some()); + } + + #[tokio::test] + async fn finalize_hosted_falls_back_to_ssh_when_private() { + let kind = parse_bare_specifier("foo/private-repo").expect("hosted"); + let probe = Fake { head_ok: false, ls_ok: false, calls: Mutex::new(Vec::new()) }; + let spec = kind.finalize(&probe).await; + assert_eq!(spec.fetch_spec, "git+ssh://git@github.com/foo/private-repo.git"); + } + + // Ported `parsePref.test.ts` SCP-style URL repair cases. Each row + // is `(input, expected_fetch_spec)`. + #[tokio::test] + async fn fetch_spec_for_scp_style_inputs() { + let probe = fake(); + let cases: &[(&str, &str)] = &[ + ( + "ssh://username:password@example.com:repo.git", + "ssh://username:password@example.com/repo.git", + ), + ( + "ssh://username:password@example.com:repo/@foo.git", + "ssh://username:password@example.com/repo/@foo.git", + ), + ( + "ssh://username:password@example.com:22/repo/@foo.git", + "ssh://username:password@example.com:22/repo/@foo.git", + ), + ( + "ssh://username:password@example.com:22repo/@foo.git", + "ssh://username:password@example.com/22repo/@foo.git", + ), + ( + "ssh://username:password@example.com:22/repo/@foo.git#path:/a/@b", + "ssh://username:password@example.com:22/repo/@foo.git", + ), + ( + "ssh://username:password@example.com:22/repo/@foo.git#path:/a/@b&dev", + "ssh://username:password@example.com:22/repo/@foo.git", + ), + ( + "git+ssh://username:password@example.com:repo.git", + "ssh://username:password@example.com/repo.git", + ), + ( + "git+ssh://username:password@example.com:repo/@foo.git", + "ssh://username:password@example.com/repo/@foo.git", + ), + ( + "git+ssh://username:password@example.com:22/repo/@foo.git", + "ssh://username:password@example.com:22/repo/@foo.git", + ), + ( + "git+ssh://username:password@example.com:22/repo/@foo.git#path:/a/@b", + "ssh://username:password@example.com:22/repo/@foo.git", + ), + ( + "git+ssh://username:password@example.com:22/repo/@foo.git#path:/a/@b&dev", + "ssh://username:password@example.com:22/repo/@foo.git", + ), + ("git+https://github.com/pnpm/pnpm.git", "https://github.com/pnpm/pnpm.git"), + ( + "git+ssh://git@sub.domain.tld:internal-app/sub-path/service-name.git", + "ssh://git@sub.domain.tld/internal-app/sub-path/service-name.git", + ), + ]; + for (input, expected) in cases { + let kind = parse_bare_specifier(input).expect("parse claims input"); + let spec = kind.finalize(&probe).await; + assert_eq!( + spec.fetch_spec, + *expected, + "input {input}: expected fetch_spec {expected}, got {got}", + got = spec.fetch_spec, + ); + } + } + + // Ported `parsePref.test.ts` path-extraction cases. + #[tokio::test] + async fn path_extracted_from_scp_style_inputs() { + let probe = fake(); + let cases: &[(&str, Option<&str>)] = &[ + ("ssh://username:password@example.com:repo.git#path:/a/@b", Some("/a/@b")), + ("ssh://username:password@example.com:repo/@foo.git#path:/a/@b", Some("/a/@b")), + ("ssh://username:password@example.com:22/repo/@foo.git#path:/a/@b", Some("/a/@b")), + ("ssh://username:password@example.com:22repo/@foo.git#path:/a/@b", Some("/a/@b")), + ("ssh://username:password@example.com:22/repo/@foo.git#path:/a/@b&dev", Some("/a/@b")), + ("git+ssh://username:password@example.com:repo.git#path:/a/@b", Some("/a/@b")), + ("git+ssh://username:password@example.com:repo/@foo.git#path:/a/@b", Some("/a/@b")), + ("git+ssh://username:password@example.com:22/repo/@foo.git#path:/a/@b", Some("/a/@b")), + ( + "git+ssh://username:password@example.com:22/repo/@foo.git#path:/a/@b&dev", + Some("/a/@b"), + ), + ("ssh://username:password@example.com:repo.git", None), + ("ssh://username:password@example.com:22/repo/@foo.git#dev", None), + ("git+ssh://username:password@example.com:repo.git", None), + ("git+ssh://username:password@example.com:22/repo/@foo.git#dev", None), + ]; + for (input, expected_path) in cases { + let kind = parse_bare_specifier(input).expect("parse claims input"); + let spec = kind.finalize(&probe).await; + assert_eq!(spec.path.as_deref(), *expected_path, "input {input}: path mismatch"); + } + } + + // Ported "plain http/https URLs ending in .git should be recognized" suite. + #[tokio::test] + async fn plain_http_dot_git_recognized() { + let probe = fake(); + let cases: &[(&str, &str)] = &[ + ( + "https://gitea.osmocom.org/ttcn3/highlightjs-ttcn3.git", + "https://gitea.osmocom.org/ttcn3/highlightjs-ttcn3.git", + ), + ( + "https://gitea.osmocom.org/ttcn3/highlightjs-ttcn3.git#6daccff309fca1e7561a43984d42fa4f829ce06d", + "https://gitea.osmocom.org/ttcn3/highlightjs-ttcn3.git", + ), + ("http://example.com/repo.git", "http://example.com/repo.git"), + ("http://example.com/repo.git#main", "http://example.com/repo.git"), + ]; + for (input, expected) in cases { + let kind = parse_bare_specifier(input).expect("claim"); + let spec = kind.finalize(&probe).await; + assert_eq!(spec.fetch_spec, *expected, "input {input}"); + } + } + + #[test] + fn plain_http_non_dot_git_declined() { + for input in [ + "https://example.com/package.tar.gz", + "https://example.com/package.tgz", + "https://example.com/file", + ] { + assert!(parse_bare_specifier(input).is_none(), "input {input}"); + } + } +} diff --git a/pacquet/crates/resolving-git-resolver/src/resolve_ref.rs b/pacquet/crates/resolving-git-resolver/src/resolve_ref.rs new file mode 100644 index 0000000000..297b52f032 --- /dev/null +++ b/pacquet/crates/resolving-git-resolver/src/resolve_ref.rs @@ -0,0 +1,419 @@ +//! Ports the ref-resolution helpers from pnpm's +//! [`index.ts`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/index.ts#L116-L200): +//! `resolveRef` / `getRepoRefs` / `resolveRefFromRefs` / `resolveVTags`, +//! plus the [`GitCommandRunner`] capability seam the production runner +//! plugs into. + +use std::{ + collections::{BTreeSet, HashMap}, + future::Future, + pin::Pin, +}; + +use derive_more::{Display, Error}; +use miette::Diagnostic; +use node_semver::{Range, Version}; + +/// Capability seam for `git ls-remote`. +/// +/// Real installs supply an impl that shells out to the system `git` +/// binary via `tokio::process::Command`; tests supply a fake that +/// returns canned stdout for a given (repo, args) pair. +pub trait GitCommandRunner: Send + Sync { + /// Invoke `git ls-remote [ ^{}]` (or + /// `git ls-remote ` when `ref_` is `None`) and return the + /// captured stdout on success. Match upstream's `graceful-git` + /// retry-of-one behaviour (one attempt + one retry, total two + /// attempts at most). + fn ls_remote<'a>( + &'a self, + repo: &'a str, + ref_: Option<&'a str>, + ) -> Pin> + Send + 'a>>; +} + +/// Error from a [`GitCommandRunner::ls_remote`] invocation. Returned +/// verbatim through [`GitResolveRefError::Runner`]. +#[derive(Debug, Display, Error, Diagnostic)] +#[display("git ls-remote failed: {message}")] +#[diagnostic(code(ERR_PNPM_GIT_LS_REMOTE_FAILED))] +pub struct GitRunError { + pub message: String, +} + +/// Errors raised by [`resolve_ref`]. +#[derive(Debug, Display, Error, Diagnostic)] +pub enum GitResolveRefError { + /// `git ls-remote` failed. + #[display("{_0}")] + Runner(#[error(source)] GitRunError), + + /// Mirrors upstream's `ERR_PNPM_GIT_AMBIGUOUS_REF`. Raised when a + /// partial commit reference resolves to a commit whose hash does + /// not start with the reference (the resolver picked a branch / + /// tag whose tip happened to match the prefix, which is the + /// scenario the original `PnpmError` was added for). + #[display("resolved commit {commit} from commit-ish reference {ref_}")] + #[diagnostic(code(ERR_PNPM_GIT_AMBIGUOUS_REF))] + AmbiguousRef { + #[error(not(source))] + ref_: String, + #[error(not(source))] + commit: String, + }, + + /// Mirrors upstream's plain `Could not resolve to a commit + /// of .` error. + #[display("Could not resolve {ref_} to a commit of {repo}.")] + UnknownRef { + #[error(not(source))] + ref_: String, + #[error(not(source))] + repo: String, + }, + + /// Mirrors upstream's `Could not resolve to a commit of + /// . Available versions are: , ` error. + #[display( + "Could not resolve {range} to a commit of {repo}. Available versions are: {available}" + )] + UnknownRange { + #[error(not(source))] + range: String, + #[error(not(source))] + repo: String, + #[error(not(source))] + available: String, + }, +} + +/// Pin a git reference to a commit SHA. +/// +/// Mirrors upstream's +/// [`resolveRef`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/resolving/git-resolver/src/index.ts#L138-L149). +/// +/// * Full 40-char hex commit → return as-is, no network round-trip. +/// * Partial hex commit (7-40 chars, no range) → query `ls-remote` +/// with no ref filter, then search ref tips for a single matching +/// prefix. Surface [`GitResolveRefError::AmbiguousRef`] when the +/// matched commit does not start with the partial hash. +/// * Branch / tag (no range) → query `ls-remote ^{}` and +/// look up the resolved SHA in a fixed precedence order. +/// * Semver range (`#semver:`) → query `ls-remote` with no +/// ref filter, filter tags to those matching upstream's +/// `^refs/tags/v?\d+\.\d+\.\d+(?:[-+].+)?(?:\^\{\})?$` shape, run +/// `maxSatisfying`, look up the chosen tag. +pub async fn resolve_ref( + runner: &Runner, + repo: &str, + ref_: &str, + range: Option<&str>, +) -> Result { + let committish = is_committish(ref_); + if committish && ref_.len() == 40 { + return Ok(ref_.to_string()); + } + // Upstream passes `null` for `ref` when either `range` is set or + // the ref looks like a committish (we don't have a single + // canonical ref name to filter on). Mirror that. + let filter = if range.is_some() || committish { None } else { Some(ref_) }; + let stdout = runner.ls_remote(repo, filter).await.map_err(GitResolveRefError::Runner)?; + let refs = parse_ls_remote(&stdout); + let commit = resolve_ref_from_refs(&refs, repo, ref_, committish, range)?; + if committish && !commit.starts_with(ref_) { + return Err(GitResolveRefError::AmbiguousRef { ref_: ref_.to_string(), commit }); + } + Ok(commit) +} + +/// `true` when `ref` is a 7-40-character lowercase hex string. +/// Mirrors upstream's `ref.match(/^[0-9a-f]{7,40}$/)`. +fn is_committish(ref_: &str) -> bool { + let bytes = ref_.as_bytes(); + bytes.len() >= 7 + && bytes.len() <= 40 + && bytes.iter().all(|b| matches!(b, b'0'..=b'9' | b'a'..=b'f')) +} + +/// Parse the `git ls-remote` stdout into `{ ref_name -> commit_sha }`. +fn parse_ls_remote(stdout: &str) -> HashMap { + let mut refs = HashMap::new(); + for line in stdout.split('\n') { + if line.is_empty() { + continue; + } + if let Some((commit, ref_name)) = line.split_once('\t') { + refs.insert(ref_name.to_string(), commit.to_string()); + } + } + refs +} + +fn resolve_ref_from_refs( + refs: &HashMap, + repo: &str, + ref_: &str, + committish: bool, + range: Option<&str>, +) -> Result { + let Some(range) = range else { + // Exact-ref lookup order matches upstream verbatim. + let lookup_keys = [ + ref_.to_string(), + format!("refs/{ref_}"), + format!("refs/tags/{ref_}^{{}}"), + format!("refs/tags/{ref_}"), + format!("refs/heads/{ref_}"), + ]; + for key in &lookup_keys { + if let Some(commit) = refs.get(key) { + return Ok(commit.clone()); + } + } + if committish { + // Partial-commit fallback: any ref tip starting with the + // partial commit string. Dedupe across multiple refs that + // point at the same commit (`refs/heads/main` and + // `refs/tags/v1` may both point at the same SHA). + let mut matches = BTreeSet::new(); + for value in refs.values() { + if value.starts_with(ref_) { + matches.insert(value.clone()); + } + } + if matches.len() == 1 { + return Ok(matches.into_iter().next().unwrap()); + } + } + return Err(GitResolveRefError::UnknownRef { + ref_: ref_.to_string(), + repo: repo.to_string(), + }); + }; + + // Semver range: walk tag refs, keep the ones matching upstream's + // v?(-...|+...)? regex, dedupe, semver-sort, return the max + // satisfying. + let mut v_tags: BTreeSet = BTreeSet::new(); + for key in refs.keys() { + if !looks_like_version_tag(key) { + continue; + } + let cleaned = key + .strip_prefix("refs/tags/") + .expect("guard above ensures the prefix") + .strip_suffix("^{}") + .unwrap_or(key.strip_prefix("refs/tags/").expect("guarded")); + if Version::parse(cleaned).is_ok() || Version::parse(strip_v(cleaned)).is_ok() { + v_tags.insert(cleaned.to_string()); + } + } + + let parsed_range = Range::parse(range).map_err(|_| GitResolveRefError::UnknownRange { + range: range.to_string(), + repo: repo.to_string(), + available: v_tags.iter().cloned().collect::>().join(", "), + })?; + let pick = resolve_v_tags(&v_tags, &parsed_range); + if let Some(tag) = pick { + let commit = refs + .get(&format!("refs/tags/{tag}^{{}}")) + .or_else(|| refs.get(&format!("refs/tags/{tag}"))) + .cloned(); + if let Some(commit) = commit { + return Ok(commit); + } + } + Err(GitResolveRefError::UnknownRange { + range: range.to_string(), + repo: repo.to_string(), + available: v_tags.iter().cloned().collect::>().join(", "), + }) +} + +fn strip_v(tag: &str) -> &str { + tag.strip_prefix('v').unwrap_or(tag) +} + +/// `true` when `key` matches the upstream `refs/tags/v?(...)? +/// (^\{\})?` regex. +fn looks_like_version_tag(key: &str) -> bool { + let Some(rest) = key.strip_prefix("refs/tags/") else { return false }; + let rest = rest.strip_suffix("^{}").unwrap_or(rest); + let rest = strip_v(rest); + // Must start with `\d+\.\d+\.\d+`. The semver parser is lenient + // about trailing prerelease/build content, so we only need to + // gate the numeric prefix. + let mut chars = rest.chars().peekable(); + for _ in 0..3 { + let mut saw_digit = false; + while matches!(chars.peek(), Some(c) if c.is_ascii_digit()) { + chars.next(); + saw_digit = true; + } + if !saw_digit { + return false; + } + // Between the three groups we expect a `.`; after the third + // group anything (or nothing) goes. + if chars.peek() == Some(&'.') { + chars.next(); + } else { + // ok only if we've consumed all three groups + } + } + true +} + +/// Return the highest tag in `tags` that satisfies `range`. Mirrors +/// upstream's `semver.maxSatisfying(vTags, range, /* loose */ true)`. +fn resolve_v_tags(tags: &BTreeSet, range: &Range) -> Option { + let mut best: Option<(Version, String)> = None; + for tag in tags { + let parsed = Version::parse(tag).or_else(|_| Version::parse(strip_v(tag))).ok()?; + if range.satisfies(&parsed) { + match best { + Some((ref best_v, _)) if best_v >= &parsed => {} + _ => best = Some((parsed.clone(), tag.clone())), + } + } + } + best.map(|(_, tag)| tag) +} + +#[cfg(test)] +mod tests { + use std::sync::Mutex; + + use super::{ + GitCommandRunner, GitResolveRefError, GitRunError, looks_like_version_tag, parse_ls_remote, + resolve_ref, + }; + use std::{future::Future, pin::Pin}; + + struct Stub { + result: Result, + last_args: Mutex)>>, + } + impl GitCommandRunner for Stub { + fn ls_remote<'a>( + &'a self, + repo: &'a str, + ref_: Option<&'a str>, + ) -> Pin> + Send + 'a>> { + self.last_args.lock().unwrap().push((repo.to_string(), ref_.map(str::to_string))); + Box::pin(async move { self.result.clone().map_err(|message| GitRunError { message }) }) + } + } + fn stub(stdout: &str) -> Stub { + Stub { result: Ok(stdout.to_string()), last_args: Mutex::new(Vec::new()) } + } + + #[tokio::test] + async fn full_commit_returns_unchanged_without_network() { + let stub = stub(""); + let commit = resolve_ref( + &stub, + "https://example.com/repo.git", + "163360a8d3ae6bee9524541043197ff356f8ed99", + None, + ) + .await + .expect("resolved"); + assert_eq!(commit, "163360a8d3ae6bee9524541043197ff356f8ed99"); + assert!(stub.last_args.lock().unwrap().is_empty(), "no ls-remote for full commit"); + } + + #[tokio::test] + async fn branch_lookup_uses_refs_heads() { + let stub = stub("4c39fbc124cd4944ee51cb082ad49320fab58121\trefs/heads/canary\n"); + let commit = + resolve_ref(&stub, "https://example.com/repo.git", "canary", None).await.unwrap(); + assert_eq!(commit, "4c39fbc124cd4944ee51cb082ad49320fab58121"); + } + + #[tokio::test] + async fn annotated_tag_prefers_dereferenced_commit() { + let stub = stub(concat!( + "deadbeef00000000000000000000000000000000\trefs/tags/v1.0.0\n", + "6dcce91c268805d456b8a575b67d7febc7ae2933\trefs/tags/v1.0.0^{}\n", + )); + let commit = resolve_ref(&stub, "repo", "v1.0.0", None).await.unwrap(); + assert_eq!(commit, "6dcce91c268805d456b8a575b67d7febc7ae2933"); + } + + #[tokio::test] + async fn partial_commit_ambiguous_branch_raises() { + let stub = stub("0000000000000000000000000000000000000000\trefs/heads/main\n"); + let err = resolve_ref(&stub, "repo", "deadbeef", None).await.expect_err("ambiguous"); + match err { + GitResolveRefError::UnknownRef { .. } => {} + other => panic!("expected UnknownRef, got {other:?}"), + } + } + + #[tokio::test] + async fn partial_commit_matches_single_ref() { + let stub = stub("deadbeef1234567890123456789012345678abcd\trefs/heads/feat\n"); + let commit = resolve_ref(&stub, "repo", "deadbeef", None).await.unwrap(); + assert_eq!(commit, "deadbeef1234567890123456789012345678abcd"); + } + + #[tokio::test] + async fn ambiguous_partial_commit_mismatch_errors() { + // Single ref tip starts with `deadbe` but not `deadbf` → + // resolves to the matching commit, then trips the + // partial-prefix sanity check (matched commit does not start + // with `deadbf`). + let stub = stub("deadbeef1234567890123456789012345678abcd\trefs/heads/x\n"); + let err = resolve_ref(&stub, "repo", "deadbf12", None).await.expect_err("ambig"); + // First the lookup falls through (no exact ref match), then + // partial-commit search finds zero matches → UnknownRef. + assert!(matches!(err, GitResolveRefError::UnknownRef { .. })); + } + + #[tokio::test] + async fn semver_range_picks_max_satisfying() { + let stub = stub(concat!( + "0000000000000000000000000000000000000000\tHEAD\n", + "ed3de20970d980cf21a07fd8b8732c70d5182303\trefs/tags/v0.0.38\n", + "cba04669e621b85fbdb33371604de1a2898e68e9\trefs/tags/v0.0.39\n", + )); + let commit = resolve_ref(&stub, "repo", "HEAD", Some("~0.0.38")).await.unwrap(); + assert_eq!(commit, "cba04669e621b85fbdb33371604de1a2898e68e9"); + } + + #[tokio::test] + async fn semver_no_match_lists_available_versions() { + let stub = stub(concat!( + "aaaa\trefs/tags/v1.0.0\n", + "bbbb\trefs/tags/v1.0.1\n", + "cccc\trefs/tags/v2.0.0\n", + )); + let err = resolve_ref(&stub, "repo", "HEAD", Some("^100.0.0")).await.expect_err("err"); + match err { + GitResolveRefError::UnknownRange { available, .. } => { + assert!(available.contains("v1.0.0")); + assert!(available.contains("v2.0.0")); + } + other => panic!("expected UnknownRange, got {other:?}"), + } + } + + #[test] + fn version_tag_regex() { + assert!(looks_like_version_tag("refs/tags/1.0.0")); + assert!(looks_like_version_tag("refs/tags/v1.0.0")); + assert!(looks_like_version_tag("refs/tags/v1.0.0-beta.1")); + assert!(looks_like_version_tag("refs/tags/1.0.0^{}")); + assert!(!looks_like_version_tag("refs/tags/release")); + assert!(!looks_like_version_tag("refs/heads/main")); + } + + #[test] + fn parse_ls_remote_ignores_blank_lines() { + let refs = parse_ls_remote("abc\trefs/heads/main\n\n"); + assert_eq!(refs.len(), 1); + assert_eq!(refs.get("refs/heads/main").map(String::as_str), Some("abc")); + } +} diff --git a/pacquet/crates/resolving-git-resolver/src/runners.rs b/pacquet/crates/resolving-git-resolver/src/runners.rs new file mode 100644 index 0000000000..60f5c6201f --- /dev/null +++ b/pacquet/crates/resolving-git-resolver/src/runners.rs @@ -0,0 +1,148 @@ +//! Production [`GitProbe`] and [`GitCommandRunner`] implementations. +//! +//! Pulled out from `git_resolver.rs` to keep the public API free of +//! the runner concrete types: callers get either the production +//! pair (real network + real `git` binary) or supply their own +//! ports of the traits in tests. + +use std::{future::Future, path::PathBuf, pin::Pin, process::Stdio, sync::Arc}; + +use pacquet_network::ThrottledClient; + +use crate::{ + parse_bare_specifier::{GitProbe, ProbeFuture}, + resolve_ref::{GitCommandRunner, GitRunError}, +}; + +/// Production [`GitProbe`]. +/// +/// `https_head_ok` issues an HTTP HEAD via the install-wide +/// [`ThrottledClient`] (so concurrency-throttling, proxy, TLS, and +/// per-registry config all apply). `ls_remote_exit_code` shells out +/// to the system `git` binary. +/// +/// `git_bin` overrides the binary path; production callers leave it +/// `None` and the runner resolves `git` through `PATH`. +pub struct RealGitProbe { + pub http_client: Arc, + pub git_bin: Option, +} + +impl RealGitProbe { + pub fn new(http_client: Arc) -> Self { + Self { http_client, git_bin: None } + } +} + +impl GitProbe for RealGitProbe { + fn https_head_ok<'a>(&'a self, url: &'a str) -> ProbeFuture<'a> { + Box::pin(async move { + // Match upstream's `replace(/\.git$/, '')` strip before + // issuing HEAD — host endpoints serve the human page on + // the path without `.git`, but reject HEAD on the `.git` + // alias on some configurations. + let stripped: &str = url.strip_suffix(".git").unwrap_or(url); + let guard = self.http_client.acquire().await; + let response = guard.head(stripped).send().await; + match response { + Ok(resp) => resp.status().is_success(), + Err(_) => false, + } + }) + } + + fn ls_remote_exit_code<'a>(&'a self, repo: &'a str) -> ProbeFuture<'a> { + Box::pin(async move { + let bin = self.git_bin.as_deref().map(std::path::Path::to_path_buf); + let repo_owned = repo.to_string(); + tokio::task::spawn_blocking(move || { + let mut cmd = match bin { + Some(b) => std::process::Command::new(b), + None => std::process::Command::new("git"), + }; + cmd.args(["ls-remote", "--exit-code", &repo_owned, "HEAD"]); + cmd.stdout(Stdio::null()).stderr(Stdio::null()).stdin(Stdio::null()); + cmd.status().map(|s| s.success()).unwrap_or(false) + }) + .await + .unwrap_or(false) + }) + } +} + +/// Production [`GitCommandRunner`]. +/// +/// Shells out to `git ls-remote [ ^{}]` via +/// `tokio::task::spawn_blocking` (the system git CLI is synchronous, +/// and the rest of pacquet keeps the async runtime free of blocking +/// work). +/// +/// Mirrors upstream's `graceful-git` "one retry" policy at one extra +/// attempt on transient failure. +pub struct RealGitRunner { + pub git_bin: Option, +} + +impl RealGitRunner { + pub fn new() -> Self { + Self { git_bin: None } + } +} + +impl Default for RealGitRunner { + fn default() -> Self { + Self::new() + } +} + +impl GitCommandRunner for RealGitRunner { + fn ls_remote<'a>( + &'a self, + repo: &'a str, + ref_: Option<&'a str>, + ) -> Pin> + Send + 'a>> { + let bin = self.git_bin.as_deref().map(std::path::Path::to_path_buf); + let repo_owned = repo.to_string(); + let ref_owned = ref_.map(str::to_string); + Box::pin(async move { + tokio::task::spawn_blocking(move || run_ls_remote_blocking(bin, repo_owned, ref_owned)) + .await + .map_err(|err| GitRunError { message: format!("ls-remote task panicked: {err}") })? + }) + } +} + +fn run_ls_remote_blocking( + bin: Option, + repo: String, + ref_: Option, +) -> Result { + let attempts = 2; // matches upstream `graceful-git` retries: 1 + let mut last_err: Option = None; + for _ in 0..attempts { + let mut cmd = match bin.as_ref() { + Some(b) => std::process::Command::new(b), + None => std::process::Command::new("git"), + }; + cmd.arg("ls-remote").arg(&repo); + if let Some(r) = ref_.as_deref() { + cmd.arg(r); + cmd.arg(format!("{r}^{{}}")); + } + let output = cmd.output(); + match output { + Ok(out) if out.status.success() => { + return Ok(String::from_utf8_lossy(&out.stdout).into_owned()); + } + Ok(out) => { + last_err = Some(String::from_utf8_lossy(&out.stderr).into_owned()); + } + Err(err) => { + last_err = Some(err.to_string()); + } + } + } + Err(GitRunError { + message: last_err.unwrap_or_else(|| "ls-remote failed with unknown error".to_string()), + }) +} diff --git a/pacquet/crates/resolving-npm-resolver/src/npm_resolver.rs b/pacquet/crates/resolving-npm-resolver/src/npm_resolver.rs index 27242a51d7..904fdca7c0 100644 --- a/pacquet/crates/resolving-npm-resolver/src/npm_resolver.rs +++ b/pacquet/crates/resolving-npm-resolver/src/npm_resolver.rs @@ -343,7 +343,8 @@ fn build_resolve_result( ) -> Result { let pkg_name = PkgName::parse(picked.name.as_str()).map_err(|err| Box::new(err) as ResolveError)?; - let id = PkgNameVer::new(pkg_name.clone(), picked.version.clone()); + let name_ver = PkgNameVer::new(pkg_name.clone(), picked.version.clone()); + let id = (&name_ver).into(); // The picker always carries a tarball URL on its `dist` payload — // every npm registry serves `dist.tarball` on a successful pick // and pacquet's deserializer requires it (`dist.tarball: String`, @@ -370,6 +371,7 @@ fn build_resolve_result( ); Ok(ResolveResult { id, + name_ver: Some(name_ver), latest: meta.dist_tag("latest").map(str::to_string), published_at, manifest, diff --git a/pacquet/crates/resolving-npm-resolver/src/npm_resolver/tests.rs b/pacquet/crates/resolving-npm-resolver/src/npm_resolver/tests.rs index a724c064dd..9b6a2b5c60 100644 --- a/pacquet/crates/resolving-npm-resolver/src/npm_resolver/tests.rs +++ b/pacquet/crates/resolving-npm-resolver/src/npm_resolver/tests.rs @@ -115,8 +115,10 @@ async fn range_specifier_picks_max_in_range() { ..WantedDependency::default() }; let result = resolver.resolve(&wanted, &ResolveOptions::default()).await.unwrap().unwrap(); - assert_eq!(result.id.name.to_string(), "acme"); - assert_eq!(result.id.suffix.to_string(), "1.1.0"); + let name_ver = result.name_ver.as_ref().expect("npm resolver fills name_ver"); + assert_eq!(name_ver.name.to_string(), "acme"); + assert_eq!(name_ver.suffix.to_string(), "1.1.0"); + assert_eq!(result.id.as_str(), "acme@1.1.0"); assert_eq!(result.latest.as_deref(), Some("1.1.0")); assert_eq!(result.resolved_via, "npm-registry"); assert_eq!(result.alias.as_deref(), Some("acme")); @@ -150,7 +152,7 @@ async fn missing_bare_specifier_synthesizes_default_tag_query() { let wanted = WantedDependency { alias: Some("acme".to_string()), ..WantedDependency::default() }; let result = resolver.resolve(&wanted, &ResolveOptions::default()).await.unwrap().unwrap(); - assert_eq!(result.id.suffix.to_string(), "1.1.0"); + assert_eq!(result.name_ver.as_ref().expect("name_ver").suffix.to_string(), "1.1.0"); } #[tokio::test] @@ -248,8 +250,9 @@ async fn jsr_specifier_routes_through_jsr_registry() { ..WantedDependency::default() }; let result = resolver.resolve(&wanted, &ResolveOptions::default()).await.unwrap().unwrap(); - assert_eq!(result.id.name.to_string(), "@jsr/foo__bar"); - assert_eq!(result.id.suffix.to_string(), "1.1.0"); + let name_ver = result.name_ver.as_ref().expect("npm resolver fills name_ver"); + assert_eq!(name_ver.name.to_string(), "@jsr/foo__bar"); + assert_eq!(name_ver.suffix.to_string(), "1.1.0"); assert_eq!(result.resolved_via, "jsr-registry"); assert_eq!(result.alias.as_deref(), Some("@foo/bar")); assert_eq!(result.latest.as_deref(), Some("1.1.0")); @@ -277,7 +280,10 @@ async fn jsr_specifier_without_selector_uses_default_tag() { ..WantedDependency::default() }; let result = resolver.resolve(&wanted, &ResolveOptions::default()).await.unwrap().unwrap(); - assert_eq!(result.id.suffix.to_string(), "1.1.0"); + assert_eq!( + result.name_ver.as_ref().expect("npm resolver fills name_ver").suffix.to_string(), + "1.1.0", + ); assert_eq!(result.resolved_via, "jsr-registry"); } diff --git a/pacquet/crates/resolving-resolver-base/Cargo.toml b/pacquet/crates/resolving-resolver-base/Cargo.toml index e4f23ba205..f0a7ae13e4 100644 --- a/pacquet/crates/resolving-resolver-base/Cargo.toml +++ b/pacquet/crates/resolving-resolver-base/Cargo.toml @@ -14,9 +14,10 @@ repository.workspace = true pacquet-config = { workspace = true } pacquet-lockfile = { workspace = true } -chrono = { workspace = true } -serde = { workspace = true } -serde_json = { workspace = true } +chrono = { workspace = true } +derive_more = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } [dev-dependencies] ssri = { workspace = true } diff --git a/pacquet/crates/resolving-resolver-base/src/lib.rs b/pacquet/crates/resolving-resolver-base/src/lib.rs index 4393a7969f..81d580b11b 100644 --- a/pacquet/crates/resolving-resolver-base/src/lib.rs +++ b/pacquet/crates/resolving-resolver-base/src/lib.rs @@ -27,10 +27,10 @@ mod verifier; pub use resolve::{ DIRECT_DEP_SELECTOR_WEIGHT, DependencyManifest, EXISTING_VERSION_SELECTOR_WEIGHT, LatestInfo, - LatestQuery, PreferredVersions, ResolveError, ResolveFuture, ResolveLatestFuture, - ResolveOptions, ResolveResult, Resolver, UpdateBehavior, VersionSelectorEntry, - VersionSelectorType, VersionSelectorWithWeight, VersionSelectors, WantedDependency, - WorkspacePackage, WorkspacePackages, WorkspacePackagesByVersion, + LatestQuery, PkgResolutionId, PreferredVersions, ResolveError, ResolveFuture, + ResolveLatestFuture, ResolveOptions, ResolveResult, Resolver, UpdateBehavior, + VersionSelectorEntry, VersionSelectorType, VersionSelectorWithWeight, VersionSelectors, + WantedDependency, WorkspacePackage, WorkspacePackages, WorkspacePackagesByVersion, }; pub use verifier::{ ResolutionPolicyViolation, ResolutionVerification, ResolutionVerifier, VerifyCtx, VerifyFuture, diff --git a/pacquet/crates/resolving-resolver-base/src/resolve.rs b/pacquet/crates/resolving-resolver-base/src/resolve.rs index 065b6214e6..591f5ec606 100644 --- a/pacquet/crates/resolving-resolver-base/src/resolve.rs +++ b/pacquet/crates/resolving-resolver-base/src/resolve.rs @@ -11,12 +11,57 @@ use std::{collections::BTreeMap, future::Future, path::PathBuf, pin::Pin}; use chrono::{DateTime, Utc}; +use derive_more::{Display, From}; use pacquet_config::version_policy::PackageVersionPolicy; use pacquet_lockfile::{LockfileResolution, PkgNameVer}; use serde::{Deserialize, Serialize}; use crate::verifier::ResolutionPolicyViolation; +/// Branded resolution identifier the resolver chain emits on every +/// successful pick. Mirrors pnpm's +/// [`PkgResolutionId`](https://github.com/pnpm/pnpm/blob/ef87f3ccff/core/types/src/misc.ts#L59) +/// — a phantom-typed string with no runtime validator. +/// +/// Two shapes appear in the wild: +/// * `name@version` from the npm-registry resolver. +/// * URL-shaped (`git+https://…#sha`, `https://codeload.github.com/…/tar.gz/sha`, +/// `file:…`) from the git / local / tarball resolvers. +/// +/// Consumers that need the structured `name@version` form read +/// [`ResolveResult::name_ver`] instead. +#[derive(Debug, Display, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, From)] +#[serde(transparent)] +pub struct PkgResolutionId(String); + +impl PkgResolutionId { + pub fn as_str(&self) -> &str { + &self.0 + } + + pub fn into_inner(self) -> String { + self.0 + } +} + +impl From<&str> for PkgResolutionId { + fn from(value: &str) -> Self { + PkgResolutionId(value.to_string()) + } +} + +impl From<&PkgNameVer> for PkgResolutionId { + fn from(value: &PkgNameVer) -> Self { + PkgResolutionId(value.to_string()) + } +} + +impl From for PkgResolutionId { + fn from(value: PkgNameVer) -> Self { + PkgResolutionId(value.to_string()) + } +} + /// An entry from a project's manifest that the resolver chain will /// route to a concrete protocol. Mirrors pnpm's /// [`WantedDependency`](https://github.com/pnpm/pnpm/blob/3687b0e180/resolving/resolver-base/src/index.ts#L304-L313). @@ -189,11 +234,17 @@ pub type DependencyManifest = serde_json::Value; /// [`ResolveResult`](https://github.com/pnpm/pnpm/blob/3687b0e180/resolving/resolver-base/src/index.ts#L212-L237). #[derive(Debug, Clone, PartialEq)] pub struct ResolveResult { - /// Branded `{name}@{version}` identifier upstream calls - /// `PkgResolutionId`. Pacquet reuses - /// [`pacquet_lockfile::PkgNameVer`], which already pins the same - /// shape used elsewhere in the codebase. - pub id: PkgNameVer, + /// Branded resolution identifier — see [`PkgResolutionId`]. + pub id: PkgResolutionId, + /// Structured `name@version` when the resolver knows both at + /// resolve time. The npm-registry resolver always fills this; + /// resolvers that learn the package name from the manifest only + /// after the fetch (git / tarball / local) leave it `None` and + /// downstream consumers (virtual-store layout, dedupe keys) must + /// fall back to reading the manifest. Mirrors the upstream + /// pattern where `result.manifest.name` and `result.manifest.version` + /// are the canonical name/version sources for non-npm resolutions. + pub name_ver: Option, /// `latest` tag at the moment of resolution. Filled by the npm /// resolver; absent for protocols that have no notion of latest /// (git, file, link, …). diff --git a/pacquet/crates/resolving-resolver-base/src/tests.rs b/pacquet/crates/resolving-resolver-base/src/tests.rs index 9d1f02652b..cc66669abf 100644 --- a/pacquet/crates/resolving-resolver-base/src/tests.rs +++ b/pacquet/crates/resolving-resolver-base/src/tests.rs @@ -134,9 +134,10 @@ impl Resolver for StubResolver { if !alias.starts_with("claim:") { return Ok(None); } - let id: PkgNameVer = "lodash@4.17.21".parse().expect("parse fake PkgNameVer"); + let name_ver: PkgNameVer = "lodash@4.17.21".parse().expect("parse fake PkgNameVer"); Ok(Some(ResolveResult { - id, + id: (&name_ver).into(), + name_ver: Some(name_ver), latest: None, published_at: None, manifest: None,