From 0cefccf15839bd69dc7a3db2c8adaa85bb32c87e Mon Sep 17 00:00:00 2001 From: Zoltan Kochan Date: Thu, 28 May 2026 01:30:42 +0200 Subject: [PATCH] feat(registry): persist pnpr users and tokens to disk (#11977) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(registry): persist pnpr users and tokens to disk Backs UserStore with a verdaccio-shaped htpasswd file (bcrypt $2y$ hashes, atomically rewritten on every adduser) and TokenStore with a SQLite database that stores SHA-256 token hashes plus the per-record fields the upcoming /-/npm/v1/tokens surface will need (created_at, last_used_at, readonly, cidr_whitelist). Configuration mirrors verdaccio's auth.htpasswd.{file,max_users} under the existing YAML schema; tokens default to a tokens.db sibling of htpasswd, overridable via auth.tokens.file. max_users=-1 disables registration end-to-end. Both files are written via tmp+rename and loaded eagerly on startup so a malformed htpasswd fails fast rather than booting with a silent empty user list. Closes #11974. * fix(registry): use OS CSPRNG, satisfy dylint + rustdoc - TokenStore's per-process secret now comes from getrandom (OS-backed CSPRNG) instead of time/pid/stack address. Tokens are derived from this secret + a per-issue nonce, so weak entropy was making mint outputs guessable to an attacker who could bound those inputs. - Reorder derives on AuthConfig / HtpasswdConfig / TokensConfig / MaxUsers to satisfy perfectionist::derive-ordering (prefix-then- alphabetical: Debug, Default first, then the rest). - Re-export auth::identify so the rustdoc link from the now-public UserStore::verify resolves; rustdoc::private-intra-doc-links no longer fails the workspace doc build. - Drop the inaccurate "+inf" mention from MaxUsers' doc — serde-saphyr treats +inf as a float and can't deserialize it into i64, so the only way to get Unlimited is to omit max_users. --- Cargo.lock | 51 ++ Cargo.toml | 2 + registry/crates/pnpm-registry/Cargo.toml | 3 + registry/crates/pnpm-registry/src/auth.rs | 789 ++++++++++++++---- registry/crates/pnpm-registry/src/config.rs | 193 +++++ registry/crates/pnpm-registry/src/error.rs | 44 + registry/crates/pnpm-registry/src/lib.rs | 8 +- registry/crates/pnpm-registry/src/server.rs | 48 +- .../pnpm-registry/tests/auth_persistence.rs | 230 +++++ 9 files changed, 1198 insertions(+), 170 deletions(-) create mode 100644 registry/crates/pnpm-registry/tests/auth_persistence.rs diff --git a/Cargo.lock b/Cargo.lock index 9e3157f96d..889ca75e9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -301,6 +301,19 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bcrypt" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abaf6da45c74385272ddf00e1ac074c7d8a6c1a1dda376902bd6a427522a8b2c" +dependencies = [ + "base64 0.22.1", + "blowfish", + "getrandom 0.3.4", + "subtle", + "zeroize", +] + [[package]] name = "bitflags" version = "2.11.1" @@ -316,6 +329,16 @@ dependencies = [ "generic-array", ] +[[package]] +name = "blowfish" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e412e2cd0f2b2d93e02543ceae7917b3c70331573df19ee046bcbc35e45e87d7" +dependencies = [ + "byteorder", + "cipher", +] + [[package]] name = "bstr" version = "1.12.1" @@ -339,6 +362,12 @@ version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.11.1" @@ -413,6 +442,16 @@ dependencies = [ "half", ] +[[package]] +name = "cipher" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" +dependencies = [ + "crypto-common", + "inout", +] + [[package]] name = "clap" version = "4.6.1" @@ -1457,6 +1496,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "inout" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879f10e63c20629ecabbb64a8010319738c66a5cd0c29b02d63d272b03751d01" +dependencies = [ + "generic-array", +] + [[package]] name = "insta" version = "1.47.2" @@ -2990,15 +3038,18 @@ version = "0.0.1" dependencies = [ "axum", "base64 0.22.1", + "bcrypt", "clap", "derive_more", "futures-util", + "getrandom 0.3.4", "indexmap", "miette 7.6.0", "mockito", "pacquet-network", "pipe-trait", "reqwest", + "rusqlite", "serde", "serde-saphyr", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index 028c30dd4d..d604e255f8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -82,6 +82,7 @@ axum = { version = "0.8.7", default-features = false, features = [ clap = { version = "4", features = ["derive", "string"] } command-extra = { version = "1.0.0" } base64 = { version = "0.22.1" } +bcrypt = { version = "0.17.1" } chrono = { version = "0.4.44", default-features = false, features = ["clock"] } dashmap = { version = "6.1.0" } derive_more = { version = "2.1.1", features = ["full"] } @@ -95,6 +96,7 @@ insta = { version = "1.47.2", features = ["yaml", "glob", "walkdir"] } itertools = { version = "0.14.0" } futures-util = { version = "0.3.32" } gethostname = { version = "1" } +getrandom = { version = "0.3.4" } miette = { version = "7.6.0", features = ["fancy"] } num_cpus = { version = "1.17.0" } os_display = { version = "0.1.4" } diff --git a/registry/crates/pnpm-registry/Cargo.toml b/registry/crates/pnpm-registry/Cargo.toml index 1eb611764c..f87070c479 100644 --- a/registry/crates/pnpm-registry/Cargo.toml +++ b/registry/crates/pnpm-registry/Cargo.toml @@ -22,12 +22,15 @@ path = "src/main.rs" pacquet-network = { workspace = true } axum = { workspace = true } base64 = { workspace = true } +bcrypt = { workspace = true } clap = { workspace = true } derive_more = { workspace = true } futures-util = { workspace = true } +getrandom = { workspace = true } indexmap = { workspace = true } miette = { workspace = true } reqwest = { workspace = true } +rusqlite = { workspace = true } serde = { workspace = true } serde-saphyr = { workspace = true } serde_json = { workspace = true } diff --git a/registry/crates/pnpm-registry/src/auth.rs b/registry/crates/pnpm-registry/src/auth.rs index 9ce84f59b5..0b105e4644 100644 --- a/registry/crates/pnpm-registry/src/auth.rs +++ b/registry/crates/pnpm-registry/src/auth.rs @@ -1,82 +1,195 @@ //! User and token storage for the registry. //! -//! The pnpm tests that use `@pnpm/registry-mock` boot the registry, -//! call `addUser` once, and then exercise the resulting Bearer token -//! against protected packages. There's no need for password hashing, -//! token expiration, or on-disk persistence — everything lives in an -//! in-memory store guarded by a `Mutex`. +//! Two stores back the auth flow: //! -//! Two pieces matter: +//! * [`UserStore`] — username → bcrypt-hashed password. Persisted as +//! an Apache-style htpasswd file when [`UserStore::open`] is given +//! a path; in-memory otherwise. The on-disk format is one +//! `:` line per user, so the same file can +//! be inspected and verified by Apache's `htpasswd -v`. +//! * [`TokenStore`] — SHA-256 token hash → token record. Persisted in +//! a SQLite database when [`TokenStore::open`] is given a path; +//! in-memory otherwise. The raw token is only returned to the +//! caller once on `issue`; only its hash ever hits disk so a leak +//! of the database doesn't grant access on its own. //! -//! * [`UserStore`] — username → plaintext password. Verified via -//! constant-time compare to guard against test-timing weirdness -//! (overkill but cheap, since `subtle` isn't in the workspace and -//! we can write it ourselves). -//! * [`TokenStore`] — token → username. Tokens are 32-hex-char -//! strings derived from a per-server secret plus a monotonic -//! counter via SHA-256 — opaque to the client, not guessable -//! without the secret, and never collide within a process. +//! Both stores keep a full mirror of their state in a `Mutex<...>` +//! and persist on every write. Reads (the hot path for +//! `enforce_access`) never touch disk. +use std::collections::HashMap; +use std::path::{Path, PathBuf}; use std::sync::Mutex; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{SystemTime, UNIX_EPOCH}; use base64::Engine; use base64::engine::general_purpose::STANDARD as BASE64; +use rusqlite::Connection; use sha2::{Digest, Sha256}; -use crate::error::RegistryError; +use crate::config::{AuthConfig, MaxUsers}; +use crate::error::{RegistryError, Result}; -/// In-memory username → password map. Populated by the adduser -/// endpoint; passwords are stored in plaintext because tests -/// already have the plaintext on hand and there's no value in -/// hashing it for a process-local registry. -#[derive(Debug, Default)] +/// Bundle of the user store and the token store. Built once at +/// startup so the rest of the server doesn't have to know whether +/// auth is file-backed or in-memory. +#[derive(Debug)] +pub struct AuthState { + pub users: UserStore, + pub tokens: TokenStore, +} + +impl AuthState { + /// All-in-memory auth state. Used when neither + /// `auth.htpasswd.file` nor `auth.tokens.file` are configured, + /// and by tests that don't care about persistence. + pub fn in_memory() -> Self { + Self { users: UserStore::in_memory(), tokens: TokenStore::in_memory() } + } + + /// Build the auth state from an [`AuthConfig`]. Either store is + /// in-memory when its file path is unset; otherwise the on-disk + /// state is loaded eagerly so a malformed htpasswd or a + /// permission-denied SQLite file surfaces as a startup error. + pub fn load(config: &AuthConfig) -> Result { + let users = match config.htpasswd.file.clone() { + Some(path) => UserStore::open(path, config.htpasswd.max_users)?, + None => UserStore::in_memory(), + }; + let tokens = match config.tokens.file.clone() { + Some(path) => TokenStore::open(path)?, + None => TokenStore::in_memory(), + }; + Ok(Self { users, tokens }) + } +} + +/// Bcrypt cost factor used for new password hashes. Cost 10 is what +/// verdaccio uses by default and matches Apache `htpasswd -B`'s +/// default, so files written here verify cleanly against either +/// tool. ~50–100 ms per hash on modern hardware — slow enough to +/// frustrate offline cracking, cheap enough that adduser doesn't +/// feel sluggish. +const DEFAULT_BCRYPT_COST: u32 = 10; + +/// File-backed (or in-memory) htpasswd store. +#[derive(Debug)] pub struct UserStore { - users: Mutex>, + /// `username -> bcrypt hash`. The hash string carries its own + /// version and cost (`$2y$10$...`) so we never need to remember + /// per-record metadata. + users: Mutex>, + path: Option, + max_users: MaxUsers, + bcrypt_cost: u32, } impl UserStore { - pub fn new() -> Self { - Self::default() + /// In-memory store with no on-disk persistence. Used when + /// `auth.htpasswd.file` is unset and by the existing + /// `@pnpm/registry-mock` integration where every restart is a + /// fresh process. + pub fn in_memory() -> Self { + Self { + users: Mutex::new(HashMap::new()), + path: None, + max_users: MaxUsers::Unlimited, + bcrypt_cost: DEFAULT_BCRYPT_COST, + } } - /// Returns true if the user already existed and the password - /// matched (a "login" against an existing account), false if - /// the password didn't match. When the user doesn't exist, the - /// account is created with the supplied password and `Ok(false)` - /// (still "not a login") is returned — matches verdaccio's - /// adduser behavior where a brand-new user is registered on - /// the spot. - pub fn add_or_login( - &self, - username: &str, - password: &str, - ) -> Result { - let mut users = self.users.lock().expect("UserStore mutex poisoned"); - match users.get(username) { - Some(existing) => { - if constant_time_eq(existing.as_bytes(), password.as_bytes()) { + /// File-backed store. The file is parsed up front so a malformed + /// htpasswd surfaces as a startup error rather than a silent + /// empty user list. A missing file is OK — it's created on the + /// first registration. + pub fn open(path: PathBuf, max_users: MaxUsers) -> Result { + Self::open_with_cost(path, max_users, DEFAULT_BCRYPT_COST) + } + + /// Like [`Self::open`] but with a configurable bcrypt cost — used + /// by tests that want sub-100ms hashing. + pub fn open_with_cost(path: PathBuf, max_users: MaxUsers, bcrypt_cost: u32) -> Result { + let users = match std::fs::read_to_string(&path) { + Ok(raw) => parse_htpasswd(&raw).map_err(|reason| { + RegistryError::InvalidHtpasswdFile { path: path.display().to_string(), reason } + })?, + Err(err) if err.kind() == std::io::ErrorKind::NotFound => HashMap::new(), + Err(err) => return Err(err.into()), + }; + Ok(Self { users: Mutex::new(users), path: Some(path), max_users, bcrypt_cost }) + } + + /// Add a new user or verify a returning one. + /// + /// * Unknown username, registration allowed → bcrypt the password, + /// insert, persist, return `Created`. + /// * Known username, password matches → return `LoggedIn`. + /// * Known username, password wrong → `Unauthenticated`. + /// * Unknown username, registration disabled or capped → + /// `RegistrationDisabled` / `TooManyUsers`. + pub async fn add_or_login(&self, username: &str, password: &str) -> Result { + let existing_hash = { + let users = self.users.lock().expect("UserStore mutex poisoned"); + users.get(username).cloned() + }; + if let Some(stored) = existing_hash { + return verify_bcrypt(password.to_string(), stored).await.and_then(|ok| { + if ok { Ok(UpsertOutcome::LoggedIn) } else { Err(RegistryError::Unauthenticated { resource: format!("user {username:?}") }) } - } - None => { - users.insert(username.to_string(), password.to_string()); - Ok(UpsertOutcome::Created) - } + }); } + + // Brand-new user — check the registration cap before doing + // the (expensive) bcrypt hash. + match self.max_users { + MaxUsers::Disabled => return Err(RegistryError::RegistrationDisabled), + MaxUsers::Limited(max) => { + let current = self.users.lock().expect("UserStore mutex poisoned").len() as u64; + if current >= max { + return Err(RegistryError::TooManyUsers { max }); + } + } + MaxUsers::Unlimited => {} + } + + let hash = hash_bcrypt(password.to_string(), self.bcrypt_cost).await?; + let snapshot = { + let mut users = self.users.lock().expect("UserStore mutex poisoned"); + // Re-check the cap under the lock to make the limit hold + // under concurrent adduser bursts. A second writer that + // raced in while we were hashing could otherwise push + // past the cap. + if let MaxUsers::Limited(max) = self.max_users + && (users.len() as u64) >= max + && !users.contains_key(username) + { + return Err(RegistryError::TooManyUsers { max }); + } + users.insert(username.to_string(), hash); + serialize_htpasswd(&users) + }; + self.persist(snapshot).await?; + Ok(UpsertOutcome::Created) } /// Verify a username+password pair against the store. Returns /// `Some(username)` when the credentials match, `None` - /// otherwise — never errors, since the caller may want to - /// degrade to anonymous on failure rather than fail outright. + /// otherwise. Used by the Basic-auth path of [`identify`] — + /// kept synchronous so `enforce_access` can stay sync. + /// + /// Bcrypt verification runs inline on the caller's task; at + /// cost 10 it's ~50–100 ms, which is fine for the rare Basic + /// path. The hot path is Bearer, which doesn't bcrypt. pub fn verify(&self, username: &str, password: &str) -> Option { - let users = self.users.lock().expect("UserStore mutex poisoned"); - let stored = users.get(username)?; - constant_time_eq(stored.as_bytes(), password.as_bytes()).then(|| username.to_string()) + let stored = { + let users = self.users.lock().expect("UserStore mutex poisoned"); + users.get(username).cloned()? + }; + bcrypt::verify(password, &stored).ok()?.then(|| username.to_string()) } } @@ -88,72 +201,125 @@ pub enum UpsertOutcome { LoggedIn, } -/// In-memory token → username map. Tokens are minted on adduser -/// and on the basic-auth fallback for endpoints that need a -/// token in the response body. +impl UserStore { + async fn persist(&self, body: String) -> Result<()> { + let Some(path) = self.path.clone() else { + return Ok(()); + }; + tokio::task::spawn_blocking(move || write_atomic(&path, body.as_bytes())).await??; + Ok(()) + } +} + +/// SHA-256-hashed (token_hash → username) map, optionally backed by +/// a SQLite database for cross-restart durability. +/// +/// Token records carry the verdaccio shape (created_at, last_used_at, +/// readonly, cidr_whitelist) so they can be surfaced by future +/// `/-/npm/v1/tokens` endpoints without a schema migration. #[derive(Debug)] pub struct TokenStore { - tokens: Mutex>, + inner: Mutex, + persist: Option, secret: [u8; 32], counter: AtomicU64, } +#[derive(Debug)] +struct TokenInner { + /// hex-encoded SHA-256 of the raw token → record. + tokens: HashMap, +} + +#[derive(Debug, Clone)] +pub struct TokenRecord { + pub username: String, + pub created_at: u64, + pub last_used_at: u64, + pub readonly: bool, + pub cidr_whitelist: Vec, +} + impl TokenStore { - /// Build a store with a freshly-randomized secret. The secret - /// is derived from the system time + process id + a small - /// startup-only RNG fallback — good enough for a test server - /// that runs for a few seconds at a time, and lets us avoid - /// pulling in `rand` as a new workspace dependency. - pub fn new() -> Self { - let mut hasher = Sha256::new(); - let nanos = SystemTime::now() - .duration_since(UNIX_EPOCH) - .map(|duration| duration.as_nanos()) - .unwrap_or(0); - hasher.update(nanos.to_le_bytes()); - hasher.update(std::process::id().to_le_bytes()); - // Mix in the address of a stack allocation to add a sliver - // of ASLR-derived entropy; not relied on for security, just - // for collision resistance across multiple test processes - // started within the same nanosecond. - let addr = &nanos as *const u128 as usize; - hasher.update(addr.to_le_bytes()); - let mut secret = [0u8; 32]; - secret.copy_from_slice(&hasher.finalize()); + /// Pure in-memory store. Tokens vanish on restart. + pub fn in_memory() -> Self { Self { - tokens: Mutex::new(std::collections::HashMap::new()), - secret, + inner: Mutex::new(TokenInner { tokens: HashMap::new() }), + persist: None, + secret: fresh_secret(), counter: AtomicU64::new(0), } } - /// Mint a fresh token for `username` and remember it. - pub fn issue(&self, username: &str) -> String { - let nonce = self.counter.fetch_add(1, Ordering::Relaxed); - let mut hasher = Sha256::new(); - hasher.update(self.secret); - hasher.update(nonce.to_le_bytes()); - hasher.update(username.as_bytes()); - let digest = hasher.finalize(); - // 16 bytes of hash → 32 hex chars. Long enough to be - // unguessable, short enough to keep test logs readable. - let token = hex_encode(&digest[..16]); - let mut tokens = self.tokens.lock().expect("TokenStore mutex poisoned"); - tokens.insert(token.clone(), username.to_string()); - token + /// SQLite-backed store. Creates the file (and the `tokens` + /// table) if missing; loads existing records into memory on + /// startup so the hot lookup path doesn't touch disk. + pub fn open(path: PathBuf) -> Result { + if let Some(parent) = path.parent() + && !parent.as_os_str().is_empty() + { + std::fs::create_dir_all(parent)?; + } + let conn = Connection::open(&path)?; + init_tokens_schema(&conn)?; + let tokens = load_all_tokens(&conn)?; + drop(conn); + Ok(Self { + inner: Mutex::new(TokenInner { tokens }), + persist: Some(path), + secret: fresh_secret(), + counter: AtomicU64::new(0), + }) } - /// Resolve a token back to its username, if it was issued by - /// this store. - pub fn lookup(&self, token: &str) -> Option { - let tokens = self.tokens.lock().expect("TokenStore mutex poisoned"); - tokens.get(token).cloned() + /// Mint a fresh token for `username`, persist its hash, and + /// return the raw token to the caller. The raw token is never + /// stored. + pub async fn issue(&self, username: &str) -> Result { + let nonce = self.counter.fetch_add(1, Ordering::Relaxed); + let raw = mint_token(&self.secret, nonce, username); + let token_hash = sha256_hex(raw.as_bytes()); + let record = TokenRecord { + username: username.to_string(), + created_at: unix_seconds(), + last_used_at: unix_seconds(), + readonly: false, + cidr_whitelist: Vec::new(), + }; + { + let mut inner = self.inner.lock().expect("TokenStore mutex poisoned"); + inner.tokens.insert(token_hash.clone(), record.clone()); + } + if let Some(path) = self.persist.clone() { + let hash_for_db = token_hash.clone(); + tokio::task::spawn_blocking(move || -> Result<()> { + let conn = Connection::open(&path)?; + upsert_token(&conn, &hash_for_db, &record)?; + Ok(()) + }) + .await??; + } + Ok(raw) + } + + /// Resolve a raw token back to its username, if it was ever + /// issued (and not since deleted). Runs entirely in memory. + pub fn lookup(&self, raw: &str) -> Option { + let token_hash = sha256_hex(raw.as_bytes()); + let inner = self.inner.lock().expect("TokenStore mutex poisoned"); + inner.tokens.get(&token_hash).map(|record| record.username.clone()) } } impl Default for TokenStore { fn default() -> Self { - Self::new() + Self::in_memory() + } +} + +impl Default for UserStore { + fn default() -> Self { + Self::in_memory() } } @@ -187,6 +353,222 @@ pub fn identify( None } +// --------------------------------------------------------------- +// htpasswd I/O +// --------------------------------------------------------------- + +/// Parse an Apache-shaped htpasswd file. Each non-empty, non-comment +/// line is `username:hash`; we accept any bcrypt variant (`$2a$`, +/// `$2b$`, `$2y$`) but reject everything else so a config file +/// holding `crypt(3)` or plaintext entries can't masquerade as +/// passing without the password actually being verifiable. +fn parse_htpasswd(raw: &str) -> std::result::Result, String> { + let mut out = HashMap::new(); + for (line_no, line) in raw.lines().enumerate() { + let line = line.trim_end_matches(['\r']); + if line.is_empty() || line.starts_with('#') { + continue; + } + let Some((user, hash)) = line.split_once(':') else { + return Err(format!("line {}: missing ':' separator", line_no + 1)); + }; + let user = user.trim(); + let hash = hash.trim(); + if user.is_empty() { + return Err(format!("line {}: empty username", line_no + 1)); + } + if !is_supported_hash(hash) { + return Err(format!( + "line {}: unsupported hash format for user {user:?} (only bcrypt is accepted)", + line_no + 1, + )); + } + out.insert(user.to_string(), hash.to_string()); + } + Ok(out) +} + +/// True for any bcrypt variant. We don't accept `{SHA}`, `$apr1$`, +/// crypt(3), or plaintext — every supported entry must go through +/// `bcrypt::verify` cleanly. +fn is_supported_hash(hash: &str) -> bool { + hash.starts_with("$2a$") || hash.starts_with("$2b$") || hash.starts_with("$2y$") +} + +/// Serialize the user map back to htpasswd shape. Sorted output so +/// the file is stable under `git diff` and easier to eyeball. +fn serialize_htpasswd(users: &HashMap) -> String { + let mut entries: Vec<(&String, &String)> = users.iter().collect(); + entries.sort_by(|left, right| left.0.cmp(right.0)); + let mut out = String::new(); + for (user, hash) in entries { + out.push_str(user); + out.push(':'); + out.push_str(hash); + out.push('\n'); + } + out +} + +fn write_atomic(path: &Path, bytes: &[u8]) -> Result<()> { + use std::io::Write as _; + + if let Some(parent) = path.parent() + && !parent.as_os_str().is_empty() + { + std::fs::create_dir_all(parent)?; + } + let tmp = unique_tmp_path(path); + { + let mut file = std::fs::File::create(&tmp)?; + file.write_all(bytes)?; + file.sync_all()?; + } + std::fs::rename(&tmp, path)?; + Ok(()) +} + +fn unique_tmp_path(base: &Path) -> PathBuf { + static COUNTER: AtomicU64 = AtomicU64::new(0); + let counter = COUNTER.fetch_add(1, Ordering::Relaxed); + let pid = std::process::id(); + let mut name = base.file_name().map(|n| n.to_os_string()).unwrap_or_default(); + name.push(format!(".tmp.{pid}.{counter}")); + match base.parent() { + Some(parent) => parent.join(name), + None => PathBuf::from(name), + } +} + +// --------------------------------------------------------------- +// bcrypt helpers +// --------------------------------------------------------------- + +/// Hash a password off the reactor — bcrypt at cost 10 takes +/// ~50–100 ms and stalls every other async task on the same thread +/// if run inline. +async fn hash_bcrypt(password: String, cost: u32) -> Result { + tokio::task::spawn_blocking(move || { + let parts = bcrypt::hash_with_result(&password, cost)?; + // Format as $2y$ for maximum cross-tool compatibility — + // Apache's `htpasswd -B` writes $2y$, GNU coreutils tools + // accept it, and bcrypt::verify reads any of $2a/$2b/$2y. + Ok(parts.format_for_version(bcrypt::Version::TwoY)) + }) + .await? +} + +async fn verify_bcrypt(password: String, hash: String) -> Result { + tokio::task::spawn_blocking(move || { + bcrypt::verify(&password, &hash).map_err(RegistryError::from) + }) + .await? +} + +// --------------------------------------------------------------- +// SQLite-backed token store +// --------------------------------------------------------------- + +fn init_tokens_schema(conn: &Connection) -> Result<()> { + conn.execute_batch( + "CREATE TABLE IF NOT EXISTS tokens ( + token_hash TEXT PRIMARY KEY, + username TEXT NOT NULL, + created_at INTEGER NOT NULL, + last_used_at INTEGER NOT NULL, + readonly INTEGER NOT NULL DEFAULT 0, + cidr_whitelist TEXT NOT NULL DEFAULT '[]' + ); + CREATE INDEX IF NOT EXISTS tokens_username ON tokens(username);", + )?; + Ok(()) +} + +fn load_all_tokens(conn: &Connection) -> Result> { + let mut stmt = conn.prepare( + "SELECT token_hash, username, created_at, last_used_at, readonly, cidr_whitelist + FROM tokens", + )?; + let mut rows = stmt.query([])?; + let mut out = HashMap::new(); + while let Some(row) = rows.next()? { + let hash: String = row.get(0)?; + let username: String = row.get(1)?; + let created_at: i64 = row.get(2)?; + let last_used_at: i64 = row.get(3)?; + let readonly: i64 = row.get(4)?; + let cidr_json: String = row.get(5)?; + let cidr_whitelist: Vec = serde_json::from_str(&cidr_json).unwrap_or_default(); + out.insert( + hash, + TokenRecord { + username, + created_at: created_at as u64, + last_used_at: last_used_at as u64, + readonly: readonly != 0, + cidr_whitelist, + }, + ); + } + Ok(out) +} + +fn upsert_token(conn: &Connection, token_hash: &str, record: &TokenRecord) -> Result<()> { + let cidr_json = serde_json::to_string(&record.cidr_whitelist) + .expect("Vec always serializes to JSON"); + conn.execute( + "INSERT INTO tokens (token_hash, username, created_at, last_used_at, readonly, cidr_whitelist) + VALUES (?1, ?2, ?3, ?4, ?5, ?6) + ON CONFLICT(token_hash) DO UPDATE SET + username = excluded.username, + last_used_at = excluded.last_used_at, + readonly = excluded.readonly, + cidr_whitelist = excluded.cidr_whitelist", + rusqlite::params![ + token_hash, + record.username, + record.created_at as i64, + record.last_used_at as i64, + record.readonly as i64, + cidr_json, + ], + )?; + Ok(()) +} + +// --------------------------------------------------------------- +// crypto helpers +// --------------------------------------------------------------- + +/// Build a freshly-randomized secret for [`TokenStore::issue`]. +/// Pulls 32 bytes from the OS CSPRNG (`getrandom` → `/dev/urandom` +/// on Linux, `BCryptGenRandom` on Windows, `getentropy` on macOS). +/// We refuse to start the server if the OS RNG is unavailable +/// rather than fall back to weaker entropy — token unguessability +/// is the whole reason this exists. +fn fresh_secret() -> [u8; 32] { + let mut secret = [0u8; 32]; + getrandom::fill(&mut secret).expect("OS CSPRNG must be available"); + secret +} + +fn mint_token(secret: &[u8; 32], nonce: u64, username: &str) -> String { + let mut hasher = Sha256::new(); + hasher.update(secret); + hasher.update(nonce.to_le_bytes()); + hasher.update(username.as_bytes()); + let digest = hasher.finalize(); + // 16 bytes of hash → 32 hex chars. Long enough to be + // unguessable, short enough to keep test logs readable. + hex_encode(&digest[..16]) +} + +fn sha256_hex(bytes: &[u8]) -> String { + let mut hasher = Sha256::new(); + hasher.update(bytes); + hex_encode(&hasher.finalize()) +} + fn hex_encode(bytes: &[u8]) -> String { let mut out = String::with_capacity(bytes.len() * 2); for byte in bytes { @@ -195,33 +577,36 @@ fn hex_encode(bytes: &[u8]) -> String { out } -/// Constant-time byte equality. Avoids early-exit timing leaks -/// between password comparisons. We don't import the `subtle` -/// crate just for this — a hand-rolled XOR loop is trivially -/// correct and our threat model is "test runner", not "live -/// internet". -fn constant_time_eq(left: &[u8], right: &[u8]) -> bool { - if left.len() != right.len() { - return false; - } - let mut diff: u8 = 0; - for (l, r) in left.iter().zip(right.iter()) { - diff |= l ^ r; - } - diff == 0 +fn unix_seconds() -> u64 { + SystemTime::now().duration_since(UNIX_EPOCH).map(|duration| duration.as_secs()).unwrap_or(0) } #[cfg(test)] mod tests { - use super::{TokenStore, UpsertOutcome, UserStore, identify}; + use super::{TokenStore, UpsertOutcome, UserStore, identify, parse_htpasswd}; + use crate::config::MaxUsers; - #[test] - fn adduser_creates_then_validates() { - let store = UserStore::new(); - let outcome = store.add_or_login("alice", "secret").unwrap(); + /// Tests run with cost 4 (the bcrypt crate's minimum sane value) + /// so per-test wall-clock stays in the single-digit ms range. + /// Production paths use [`DEFAULT_BCRYPT_COST`]. + const TEST_COST: u32 = 4; + + fn test_user_store() -> UserStore { + UserStore { + users: std::sync::Mutex::new(std::collections::HashMap::new()), + path: None, + max_users: MaxUsers::Unlimited, + bcrypt_cost: TEST_COST, + } + } + + #[tokio::test] + async fn adduser_creates_then_validates() { + let store = test_user_store(); + let outcome = store.add_or_login("alice", "secret").await.unwrap(); assert!(matches!(outcome, UpsertOutcome::Created)); - let outcome = store.add_or_login("alice", "secret").unwrap(); + let outcome = store.add_or_login("alice", "secret").await.unwrap(); assert!(matches!(outcome, UpsertOutcome::LoggedIn)); assert!(store.verify("alice", "secret").is_some()); @@ -229,40 +614,152 @@ mod tests { assert!(store.verify("bob", "secret").is_none()); } - #[test] - fn adduser_rejects_existing_user_with_wrong_password() { - let store = UserStore::new(); - store.add_or_login("alice", "secret").unwrap(); - let err = store.add_or_login("alice", "different").unwrap_err(); - // Maps to 401, matching what npm/verdaccio return for a - // bad password against an existing username. + #[tokio::test] + async fn adduser_rejects_existing_user_with_wrong_password() { + let store = test_user_store(); + store.add_or_login("alice", "secret").await.unwrap(); + let err = store.add_or_login("alice", "different").await.unwrap_err(); assert_eq!(err.status_code(), axum::http::StatusCode::UNAUTHORIZED); } + #[tokio::test] + async fn adduser_persists_across_reopen() { + let tmp = tempfile::tempdir().unwrap(); + let path = tmp.path().join("htpasswd"); + + let store = + UserStore::open_with_cost(path.clone(), MaxUsers::Unlimited, TEST_COST).unwrap(); + store.add_or_login("alice", "secret").await.unwrap(); + drop(store); + + // Cold-load from disk; the hashed entry should still verify. + let reopened = + UserStore::open_with_cost(path.clone(), MaxUsers::Unlimited, TEST_COST).unwrap(); + let outcome = reopened.add_or_login("alice", "secret").await.unwrap(); + assert!(matches!(outcome, UpsertOutcome::LoggedIn)); + assert!(reopened.verify("alice", "secret").is_some()); + } + + #[tokio::test] + async fn adduser_writes_bcrypt_2y_format() { + let tmp = tempfile::tempdir().unwrap(); + let path = tmp.path().join("htpasswd"); + let store = + UserStore::open_with_cost(path.clone(), MaxUsers::Unlimited, TEST_COST).unwrap(); + store.add_or_login("alice", "secret").await.unwrap(); + + let raw = std::fs::read_to_string(&path).unwrap(); + let (user, hash) = raw.trim_end().split_once(':').expect("user:hash line"); + assert_eq!(user, "alice"); + assert!(hash.starts_with("$2y$"), "expected $2y$ prefix for htpasswd compat, got {hash:?}"); + } + + #[tokio::test] + async fn max_users_minus_one_disables_registration() { + let store = UserStore { + users: std::sync::Mutex::new(std::collections::HashMap::new()), + path: None, + max_users: MaxUsers::Disabled, + bcrypt_cost: TEST_COST, + }; + let err = store.add_or_login("alice", "secret").await.unwrap_err(); + assert_eq!(err.status_code(), axum::http::StatusCode::FORBIDDEN); + } + + #[tokio::test] + async fn max_users_caps_new_registrations() { + let store = UserStore { + users: std::sync::Mutex::new(std::collections::HashMap::new()), + path: None, + max_users: MaxUsers::Limited(2), + bcrypt_cost: TEST_COST, + }; + store.add_or_login("alice", "x").await.unwrap(); + store.add_or_login("bob", "x").await.unwrap(); + let err = store.add_or_login("carol", "x").await.unwrap_err(); + assert_eq!(err.status_code(), axum::http::StatusCode::FORBIDDEN); + // Existing users may still log in once the cap is hit. + store.add_or_login("alice", "x").await.unwrap(); + } + + #[tokio::test] + async fn open_rejects_corrupt_htpasswd_at_startup() { + let tmp = tempfile::tempdir().unwrap(); + let path = tmp.path().join("htpasswd"); + std::fs::write(&path, "no-colon-here\nalice:plaintext\n").unwrap(); + let err = UserStore::open(path, MaxUsers::Unlimited).unwrap_err(); + assert!( + matches!(err, crate::error::RegistryError::InvalidHtpasswdFile { .. }), + "got {err:?}", + ); + } + #[test] - fn tokens_round_trip() { - let tokens = TokenStore::new(); - let token = tokens.issue("alice"); + fn parse_htpasswd_accepts_blank_and_comment_lines() { + let raw = "\n# comment\nalice:$2y$10$abcdef\n"; + let map = parse_htpasswd(raw).unwrap(); + assert_eq!(map.len(), 1); + assert!(map.contains_key("alice")); + } + + #[tokio::test] + async fn tokens_round_trip() { + let tokens = TokenStore::in_memory(); + let token = tokens.issue("alice").await.unwrap(); assert_eq!(tokens.lookup(&token).as_deref(), Some("alice")); assert!(tokens.lookup("not-a-token").is_none()); } - #[test] - fn tokens_are_unique_per_issue() { - let tokens = TokenStore::new(); - let a = tokens.issue("alice"); - let b = tokens.issue("alice"); + #[tokio::test] + async fn tokens_are_unique_per_issue() { + let tokens = TokenStore::in_memory(); + let a = tokens.issue("alice").await.unwrap(); + let b = tokens.issue("alice").await.unwrap(); assert_ne!(a, b, "every call to issue() should mint a fresh token"); } - #[test] - fn identify_recognizes_bearer_and_basic() { - let users = UserStore::new(); - users.add_or_login("alice", "secret").unwrap(); - let tokens = TokenStore::new(); - let token = tokens.issue("alice"); + #[tokio::test] + async fn tokens_persist_across_reopen() { + let tmp = tempfile::tempdir().unwrap(); + let path = tmp.path().join("tokens.db"); + + let store = TokenStore::open(path.clone()).unwrap(); + let raw = store.issue("alice").await.unwrap(); + drop(store); + + let reopened = TokenStore::open(path).unwrap(); + assert_eq!( + reopened.lookup(&raw).as_deref(), + Some("alice"), + "token issued before restart must still resolve after reload", + ); + } + + #[tokio::test] + async fn tokens_db_stores_hash_not_raw() { + let tmp = tempfile::tempdir().unwrap(); + let path = tmp.path().join("tokens.db"); + let store = TokenStore::open(path.clone()).unwrap(); + let raw = store.issue("alice").await.unwrap(); + + // Open the SQLite file directly and confirm the raw token + // never appears in any row. + let conn = rusqlite::Connection::open(&path).unwrap(); + let mut stmt = conn.prepare("SELECT token_hash FROM tokens").unwrap(); + let mut rows = stmt.query([]).unwrap(); + let row = rows.next().unwrap().expect("at least one row"); + let stored: String = row.get(0).unwrap(); + assert_ne!(stored, raw, "raw token must not be persisted"); + assert_eq!(stored.len(), 64, "SHA-256 hex is 64 chars"); + } + + #[tokio::test] + async fn identify_recognizes_bearer_and_basic() { + let users = test_user_store(); + users.add_or_login("alice", "secret").await.unwrap(); + let tokens = TokenStore::in_memory(); + let token = tokens.issue("alice").await.unwrap(); - // Bearer let header = format!("Bearer {token}"); assert_eq!(identify(Some(&header), &users, &tokens).as_deref(), Some("alice")); @@ -270,29 +767,25 @@ mod tests { let basic = "Basic YWxpY2U6c2VjcmV0"; assert_eq!(identify(Some(basic), &users, &tokens).as_deref(), Some("alice")); - // Wrong password — None, not an error. let wrong = format!( "Basic {}", base64::Engine::encode(&base64::engine::general_purpose::STANDARD, b"alice:wrong"), ); assert!(identify(Some(&wrong), &users, &tokens).is_none()); - // Missing header assert!(identify(None, &users, &tokens).is_none()); - - // Garbage assert!(identify(Some("Bearer total-nonsense"), &users, &tokens).is_none()); } /// RFC 7235 §2.1: "the scheme is case-insensitive". All of /// `Bearer`, `BEARER`, and `bearer` (and the mixed-case forms /// some clients emit) must resolve the same way. - #[test] - fn identify_parses_auth_scheme_case_insensitively() { - let users = UserStore::new(); - users.add_or_login("alice", "secret").unwrap(); - let tokens = TokenStore::new(); - let token = tokens.issue("alice"); + #[tokio::test] + async fn identify_parses_auth_scheme_case_insensitively() { + let users = test_user_store(); + users.add_or_login("alice", "secret").await.unwrap(); + let tokens = TokenStore::in_memory(); + let token = tokens.issue("alice").await.unwrap(); for scheme in ["Bearer", "bearer", "BEARER", "BeArEr"] { let header = format!("{scheme} {token}"); diff --git a/registry/crates/pnpm-registry/src/config.rs b/registry/crates/pnpm-registry/src/config.rs index ed07acc5b8..0dd5161b55 100644 --- a/registry/crates/pnpm-registry/src/config.rs +++ b/registry/crates/pnpm-registry/src/config.rs @@ -55,6 +55,63 @@ pub struct Config { /// `@private/*` and `@pnpm.e2e/needs-auth` policies that /// `@pnpm/registry-mock` did under verdaccio. pub policies: PackagePolicies, + /// Where to read/write the htpasswd-format user file and the + /// token database. Both stores are in-memory when their paths + /// are `None`, matching the original `@pnpm/registry-mock` mode + /// where every restart wipes accounts. + pub auth: AuthConfig, +} + +/// Auth-related runtime configuration. Built from the YAML +/// `auth:` block plus runtime defaults. +#[derive(Debug, Default, Clone)] +pub struct AuthConfig { + pub htpasswd: HtpasswdConfig, + pub tokens: TokensConfig, +} + +/// Where the htpasswd users file lives and how many users may sign +/// up before registration is refused. +#[derive(Debug, Default, Clone)] +pub struct HtpasswdConfig { + /// Absolute path to the htpasswd file. `None` keeps user state + /// in memory (back-compat with `@pnpm/registry-mock`). + pub file: Option, + /// Cap on new user registrations. + pub max_users: MaxUsers, +} + +/// Where the token database lives. SQLite-backed when `file` is +/// set; an in-memory map otherwise. +#[derive(Debug, Default, Clone)] +pub struct TokensConfig { + pub file: Option, +} + +/// Three-state cap on `auth.htpasswd.max_users`: +/// +/// * absent → unlimited (verdaccio's `+infinity` default; the YAML +/// `+inf` token is a float literal and won't parse into the +/// `i64` field, so the only way to ask for "no cap" is to omit +/// the key) +/// * `-1` → registration disabled +/// * non-negative `n` → at most `n` users +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub enum MaxUsers { + #[default] + Unlimited, + Disabled, + Limited(u64), +} + +impl MaxUsers { + /// Translate the YAML value into [`MaxUsers`]. Verdaccio accepts + /// any signed integer here; negative anything other than `-1` is + /// nonsense and is treated as "disabled" to err on the side of + /// rejecting unsafe configs. + fn from_yaml(value: i64) -> Self { + if value < 0 { MaxUsers::Disabled } else { MaxUsers::Limited(value as u64) } + } } /// Verdaccio-shaped uplink declaration. Only `url` is honored — @@ -90,6 +147,32 @@ struct ConfigFile { uplinks: IndexMap, #[serde(default)] packages: IndexMap, + #[serde(default)] + auth: AuthFile, +} + +#[derive(Debug, Default, Deserialize)] +struct AuthFile { + #[serde(default)] + htpasswd: HtpasswdFile, + #[serde(default)] + tokens: TokensFile, +} + +#[derive(Debug, Default, Deserialize)] +struct HtpasswdFile { + #[serde(default)] + file: Option, + /// `i64` so the verdaccio sentinel `-1` (registration disabled) + /// parses; anything `≥ 0` becomes a hard cap. + #[serde(default)] + max_users: Option, +} + +#[derive(Debug, Default, Deserialize)] +struct TokensFile { + #[serde(default)] + file: Option, } impl Config { @@ -121,6 +204,7 @@ impl Config { packages, packument_ttl: Self::DEFAULT_PACKUMENT_TTL, policies: PackagePolicies::registry_mock_defaults(), + auth: AuthConfig::default(), } } @@ -135,6 +219,7 @@ impl Config { packages: IndexMap::new(), packument_ttl: Self::DEFAULT_PACKUMENT_TTL, policies: PackagePolicies::registry_mock_defaults(), + auth: AuthConfig::default(), } } @@ -188,6 +273,7 @@ impl Config { let file: ConfigFile = serde_saphyr::from_str(raw)?; let storage = resolve_relative(&file.storage, base_dir); let public_url = public_url.unwrap_or_else(|| format!("http://{listen}")); + let auth = build_auth_config(&file.auth, base_dir); Ok(Self { listen, public_url, @@ -201,6 +287,7 @@ impl Config { // policy wiring is out of scope for this rebase. Keep the // hard-coded defaults — same as `proxy` / `static_serve`. policies: PackagePolicies::registry_mock_defaults(), + auth, }) } @@ -220,6 +307,38 @@ impl Config { } } +/// Build the runtime [`AuthConfig`] from the YAML `auth:` block. +/// Relative paths are resolved against `base_dir` so a path like +/// `./htpasswd` lives next to the config file (verdaccio's +/// convention). When `auth.htpasswd.file` is set but +/// `auth.tokens.file` is not, tokens default to a `tokens.db` +/// sibling of the htpasswd file — keeping credentials co-located in +/// one directory the operator can lock down (`chmod 600`). +fn build_auth_config(file: &AuthFile, base_dir: &Path) -> AuthConfig { + let htpasswd_file = file.htpasswd.file.as_deref().map(|raw| resolve_relative(raw, base_dir)); + let tokens_file = file + .tokens + .file + .as_deref() + .map(|raw| resolve_relative(raw, base_dir)) + .or_else(|| htpasswd_file.as_deref().map(default_tokens_path_sibling_of)); + AuthConfig { + htpasswd: HtpasswdConfig { + file: htpasswd_file, + max_users: file.htpasswd.max_users.map_or(MaxUsers::Unlimited, MaxUsers::from_yaml), + }, + tokens: TokensConfig { file: tokens_file }, + } +} + +/// `tokens.db` next to the htpasswd file. The sibling layout lets an +/// operator lock the auth directory down with a single chmod and +/// stops the tokens file from leaking into a `storage` directory +/// that may be served over HTTP through an unrelated misconfig. +fn default_tokens_path_sibling_of(htpasswd: &Path) -> PathBuf { + htpasswd.parent().unwrap_or_else(|| Path::new(".")).join("tokens.db") +} + /// Resolve a (possibly relative) storage path against `base_dir`. /// Verdaccio's `./storage` convention. fn resolve_relative(raw: &str, base_dir: &Path) -> PathBuf { @@ -471,4 +590,78 @@ packages: let err = Config::from_yaml(Path::new("/no/such/file.yml"), listen(), None).unwrap_err(); assert_eq!(err.kind(), std::io::ErrorKind::NotFound); } + + #[test] + fn auth_block_resolves_htpasswd_relative_to_config_dir() { + let yaml = "\ +storage: ./s +auth: + htpasswd: + file: ./htpasswd +uplinks: {} +packages: {} +"; + let config = Config::from_yaml_str(yaml, Path::new("/etc/pnpr"), listen(), None).unwrap(); + assert_eq!(config.auth.htpasswd.file.as_deref(), Some(Path::new("/etc/pnpr/./htpasswd"))); + // Tokens default to the htpasswd sibling. + assert_eq!(config.auth.tokens.file.as_deref(), Some(Path::new("/etc/pnpr/tokens.db"))); + } + + #[test] + fn auth_block_absent_keeps_in_memory_defaults() { + let yaml = "storage: ./s\nuplinks: {}\npackages: {}\n"; + let config = Config::from_yaml_str(yaml, Path::new("/x"), listen(), None).unwrap(); + assert!(config.auth.htpasswd.file.is_none()); + assert!(config.auth.tokens.file.is_none()); + assert_eq!(config.auth.htpasswd.max_users, super::MaxUsers::Unlimited); + } + + #[test] + fn auth_tokens_file_explicit_override_wins_over_sibling_default() { + let yaml = "\ +storage: ./s +auth: + htpasswd: + file: ./htpasswd + tokens: + file: /var/lib/pnpr/tokens.sqlite +uplinks: {} +packages: {} +"; + let config = Config::from_yaml_str(yaml, Path::new("/etc/pnpr"), listen(), None).unwrap(); + assert_eq!( + config.auth.tokens.file.as_deref(), + Some(Path::new("/var/lib/pnpr/tokens.sqlite")), + ); + } + + #[test] + fn auth_max_users_negative_one_means_disabled() { + let yaml = "\ +storage: ./s +auth: + htpasswd: + file: ./htpasswd + max_users: -1 +uplinks: {} +packages: {} +"; + let config = Config::from_yaml_str(yaml, Path::new("/x"), listen(), None).unwrap(); + assert_eq!(config.auth.htpasswd.max_users, super::MaxUsers::Disabled); + } + + #[test] + fn auth_max_users_positive_is_a_hard_cap() { + let yaml = "\ +storage: ./s +auth: + htpasswd: + file: ./htpasswd + max_users: 5 +uplinks: {} +packages: {} +"; + let config = Config::from_yaml_str(yaml, Path::new("/x"), listen(), None).unwrap(); + assert_eq!(config.auth.htpasswd.max_users, super::MaxUsers::Limited(5)); + } } diff --git a/registry/crates/pnpm-registry/src/error.rs b/registry/crates/pnpm-registry/src/error.rs index 9f5c2f3c70..d5bbe7f8cc 100644 --- a/registry/crates/pnpm-registry/src/error.rs +++ b/registry/crates/pnpm-registry/src/error.rs @@ -87,6 +87,43 @@ pub enum RegistryError { reason: String, }, + /// `auth.htpasswd.max_users: -1` blocks new registrations. + /// Returned for adduser on a username that doesn't already + /// exist; existing-user logins are unaffected. + #[display("New user registration is disabled by auth.htpasswd.max_users: -1")] + RegistrationDisabled, + + /// `auth.htpasswd.max_users: N` cap reached. Returned for + /// adduser on a username that doesn't already exist. + #[display("Maximum number of users ({max}) reached")] + #[from(skip)] + TooManyUsers { max: u64 }, + + /// The htpasswd file on disk couldn't be parsed at startup. + /// Surfaced as a startup-time error rather than a silent empty + /// store so a corrupted file can't quietly lock every existing + /// user out. + #[display("Invalid htpasswd file {path}: {reason}")] + #[from(skip)] + InvalidHtpasswdFile { + #[error(not(source))] + path: String, + reason: String, + }, + + /// Bcrypt hash/verify failure. Operational error, not user-facing. + #[display("Bcrypt failure: {_0}")] + Bcrypt(bcrypt::BcryptError), + + /// SQLite-backed token store failure. + #[display("Token database error: {_0}")] + Sqlite(rusqlite::Error), + + /// A blocking task spawned for bcrypt or SQLite work panicked + /// or was cancelled. Treat as an internal server error. + #[display("Background task failed: {_0}")] + JoinError(tokio::task::JoinError), + #[display("I/O error: {_0}")] Io(std::io::Error), @@ -129,6 +166,13 @@ impl RegistryError { | RegistryError::BadRequest { .. } => StatusCode::BAD_REQUEST, RegistryError::Unauthenticated { .. } => StatusCode::UNAUTHORIZED, RegistryError::Forbidden { .. } => StatusCode::FORBIDDEN, + RegistryError::RegistrationDisabled | RegistryError::TooManyUsers { .. } => { + StatusCode::FORBIDDEN + } + RegistryError::InvalidHtpasswdFile { .. } + | RegistryError::Bcrypt(_) + | RegistryError::Sqlite(_) + | RegistryError::JoinError(_) => StatusCode::INTERNAL_SERVER_ERROR, RegistryError::Io(_) | RegistryError::Json(_) => StatusCode::BAD_GATEWAY, } } diff --git a/registry/crates/pnpm-registry/src/lib.rs b/registry/crates/pnpm-registry/src/lib.rs index 0a99389401..70b3f16a1a 100644 --- a/registry/crates/pnpm-registry/src/lib.rs +++ b/registry/crates/pnpm-registry/src/lib.rs @@ -19,7 +19,11 @@ mod server; mod streaming; mod upstream; -pub use config::{Config, DEFAULT_CONFIG_YAML, PackageAccess, UplinkConfig}; +pub use auth::{AuthState, TokenStore, UserStore, identify}; +pub use config::{ + AuthConfig, Config, DEFAULT_CONFIG_YAML, HtpasswdConfig, MaxUsers, PackageAccess, TokensConfig, + UplinkConfig, +}; pub use error::{RegistryError, Result}; pub use policy::{AccessRule, PackagePolicies, PackagePolicy}; -pub use server::{router, serve}; +pub use server::{router, router_with_auth, serve}; diff --git a/registry/crates/pnpm-registry/src/server.rs b/registry/crates/pnpm-registry/src/server.rs index ef1f12ff4f..e5fba20b72 100644 --- a/registry/crates/pnpm-registry/src/server.rs +++ b/registry/crates/pnpm-registry/src/server.rs @@ -10,7 +10,7 @@ use indexmap::IndexMap; use serde_json::{Value, json}; use tower_http::trace::TraceLayer; -use crate::auth::{TokenStore, UpsertOutcome, UserStore, identify}; +use crate::auth::{AuthState, UpsertOutcome, identify}; use crate::cache::Cache; use crate::config::Config; use crate::error::RegistryError; @@ -49,12 +49,14 @@ struct AppInner { /// time so each request avoids re-allocating a `ThrottledClient`. upstreams: IndexMap, config: Config, - users: UserStore, - tokens: TokenStore, + auth: AuthState, } -/// Build the axum [`Router`] for the registry. Exposed for tests and -/// for callers that want to drive the app without binding a TCP socket. +/// Build the axum [`Router`] with in-memory auth state. Convenient +/// for tests and for callers that don't want disk-backed users — +/// [`serve`] is the production entry point and goes through +/// [`router_with_auth`] with an [`AuthState::load`]-ed bundle so a +/// corrupted htpasswd file surfaces as a startup error. /// /// The 2- and 3-segment routes do dispatch inside the handler rather /// than registering overlapping parametric routes — matchit can't @@ -62,21 +64,20 @@ struct AppInner { /// router level, so we take both via one handler that branches on /// the `@` prefix and the literal-`-` segment. pub fn router(config: Config) -> Router { + router_with_auth(config, AuthState::in_memory()) +} + +/// Like [`router`] but with a caller-supplied [`AuthState`]. Used +/// by [`serve`] to wire the persistent file-backed stores, and by +/// tests that want to override the bcrypt cost or pre-seed users. +pub fn router_with_auth(config: Config, auth: AuthState) -> Router { let cache = Cache::new(config.storage.clone()); let upstreams: IndexMap = config .uplinks .iter() .map(|(name, uplink)| (name.clone(), Upstream::new(uplink.url.clone()))) .collect(); - let state = AppState { - inner: Arc::new(AppInner { - cache, - upstreams, - config, - users: UserStore::new(), - tokens: TokenStore::new(), - }), - }; + let state = AppState { inner: Arc::new(AppInner { cache, upstreams, config, auth }) }; Router::new() .route("/{name}", get(get_packument_unscoped).put(put_one_segment)) .route("/{first}/{second}", get(get_two_segments).put(put_two_segments)) @@ -97,10 +98,14 @@ pub fn router(config: Config) -> Router { .with_state(state) } -/// Bind to `config.listen` and serve forever. +/// Bind to `config.listen` and serve forever. Loads the configured +/// htpasswd users and token database before binding the socket so +/// a startup-time auth error surfaces before we accept any client +/// connections. pub async fn serve(config: Config) -> crate::error::Result<()> { + let auth = AuthState::load(&config.auth)?; let listen = config.listen; - let app = router(config); + let app = router_with_auth(config, auth); let listener = NodelayTcpListener(tokio::net::TcpListener::bind(listen).await?); tracing::info!(%listen, "pnpm-registry listening"); axum::serve(listener, app).with_graceful_shutdown(shutdown_signal()).await?; @@ -499,11 +504,14 @@ async fn add_user(state: &AppState, name: &str, body: &[u8]) -> Response { } }; - let outcome = match state.inner.users.add_or_login(name, password) { + let outcome = match state.inner.auth.users.add_or_login(name, password).await { Ok(o) => o, Err(err) => return error_response(&err), }; - let token = state.inner.tokens.issue(name); + let token = match state.inner.auth.tokens.issue(name).await { + Ok(t) => t, + Err(err) => return error_response(&err), + }; let ok_msg = match outcome { UpsertOutcome::Created => format!("user '{name}' created"), UpsertOutcome::LoggedIn => format!("you are authenticated as '{name}'"), @@ -1038,8 +1046,8 @@ fn enforce_access( }; let authenticated = identify( headers.get(header::AUTHORIZATION).and_then(|value| value.to_str().ok()), - &state.inner.users, - &state.inner.tokens, + &state.inner.auth.users, + &state.inner.auth.tokens, ); match (rule, authenticated, action) { (AccessRule::All, _, Action::Access) => Ok(()), diff --git a/registry/crates/pnpm-registry/tests/auth_persistence.rs b/registry/crates/pnpm-registry/tests/auth_persistence.rs new file mode 100644 index 0000000000..c9ff6fcdca --- /dev/null +++ b/registry/crates/pnpm-registry/tests/auth_persistence.rs @@ -0,0 +1,230 @@ +//! Acceptance tests for issue #11974 — pnpr must keep user +//! accounts and bearer tokens across process restarts so an +//! operator can run it as a hosted registry without losing every +//! account on the next container redeploy. + +use std::net::{Ipv4Addr, SocketAddr, SocketAddrV4}; +use std::path::PathBuf; +use std::process::Command; + +use axum::body::{Body, to_bytes}; +use axum::http::{Request, StatusCode}; +use serde_json::{Value, json}; +use tempfile::TempDir; +use tower::ServiceExt; + +use pnpm_registry::{ + AuthConfig, AuthState, Config, HtpasswdConfig, MaxUsers, TokensConfig, router_with_auth, +}; + +fn listen() -> SocketAddr { + SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::LOCALHOST, 4873)) +} + +fn persistent_config(storage: PathBuf, htpasswd: PathBuf, tokens_db: PathBuf) -> Config { + let mut config = Config::static_serve(listen(), storage); + config.public_url = "http://example.test".to_string(); + config.auth = AuthConfig { + htpasswd: HtpasswdConfig { file: Some(htpasswd), max_users: MaxUsers::Unlimited }, + tokens: TokensConfig { file: Some(tokens_db) }, + }; + config +} + +async fn body_bytes(body: Body) -> Vec { + to_bytes(body, usize::MAX).await.expect("read body").to_vec() +} + +async fn body_json(body: Body) -> Value { + serde_json::from_slice(&body_bytes(body).await).expect("body parses as JSON") +} + +fn put_json(path: &str, body: Value) -> Request { + Request::put(path) + .header("content-type", "application/json") + .body(Body::from(serde_json::to_vec(&body).unwrap())) + .unwrap() +} + +fn adduser_body(username: &str, password: &str) -> Value { + json!({ + "_id": format!("org.couchdb.user:{username}"), + "name": username, + "password": password, + "email": "foo@bar.net", + "type": "user", + "roles": [], + }) +} + +/// Boot pnpr → adduser → restart pnpr with the same storage dir → +/// existing token still authenticates, existing username can log +/// back in with the same password. +#[tokio::test] +async fn user_and_token_survive_restart() { + let auth_dir = TempDir::new().unwrap(); + let storage = TempDir::new().unwrap(); + let htpasswd = auth_dir.path().join("htpasswd"); + let tokens_db = auth_dir.path().join("tokens.db"); + + let config = + persistent_config(storage.path().to_path_buf(), htpasswd.clone(), tokens_db.clone()); + let auth = AuthState::load(&config.auth).expect("first boot"); + let app = router_with_auth(config.clone(), auth); + + // adduser pulls a fresh token out of the response body. + let response = app + .clone() + .oneshot(put_json("/-/user/org.couchdb.user:alice", adduser_body("alice", "secret"))) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::CREATED); + let payload = body_json(response.into_body()).await; + let token = payload["token"].as_str().expect("token in response").to_string(); + assert!(!token.is_empty()); + + // Both files should now exist on disk. + assert!(htpasswd.exists(), "htpasswd should be created on first registration"); + assert!(tokens_db.exists(), "tokens.db should be created on first token issue"); + + // Simulate a restart: drop the router (and the in-memory map), + // re-load from disk, rebuild the router. Same config, same paths. + drop(app); + let auth = AuthState::load(&config.auth).expect("reload after restart"); + let app = router_with_auth(config, auth); + + // The token issued before the "restart" must still resolve to + // alice — proves token hash round-tripped through SQLite. + let response = app + .clone() + .oneshot( + Request::put("/-/package/anything/dist-tags/latest") + .header("content-type", "application/json") + .header("Authorization", format!("Bearer {token}")) + .body(Body::from(serde_json::to_string("1.0.0").unwrap())) + .unwrap(), + ) + .await + .unwrap(); + // 404 from the dist-tag handler is fine — the point is we got + // past the 401 gate, which proves the token still authenticates. + assert_ne!(response.status(), StatusCode::UNAUTHORIZED, "token should still authenticate"); + + // The existing username must accept the same password and not + // be re-registered as a brand-new user. + let response = app + .oneshot(put_json("/-/user/org.couchdb.user:alice", adduser_body("alice", "secret"))) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::CREATED); + let payload = body_json(response.into_body()).await; + let ok_msg = payload["ok"].as_str().unwrap_or(""); + assert!( + ok_msg.contains("authenticated"), + "second adduser should be a login, not a registration; got ok={ok_msg:?}", + ); +} + +/// Corrupt the htpasswd file → server returns a parse diagnostic on +/// startup, not a silent empty user list. The acceptance criterion +/// is that the operator sees an error rather than the server happily +/// booting up with every existing account effectively erased. +#[tokio::test] +async fn corrupt_htpasswd_fails_startup_with_diagnostic() { + let auth_dir = TempDir::new().unwrap(); + let htpasswd = auth_dir.path().join("htpasswd"); + std::fs::write(&htpasswd, "this-line-has-no-colon-and-is-not-a-comment\n").unwrap(); + + let config = persistent_config( + TempDir::new().unwrap().path().to_path_buf(), + htpasswd.clone(), + auth_dir.path().join("tokens.db"), + ); + let err = AuthState::load(&config.auth).expect_err("malformed htpasswd should fail to load"); + let message = err.to_string(); + assert!( + message.contains("htpasswd") && message.contains(&htpasswd.display().to_string()), + "error must name the htpasswd file and explain what went wrong; got {message:?}", + ); +} + +/// htpasswd file produced by pnpr is readable by Apache's `htpasswd +/// -v` — cross-tool compatibility. Skipped silently when the host +/// doesn't have htpasswd installed (some CI images don't). +#[tokio::test] +async fn htpasswd_file_is_verifiable_by_apache_htpasswd_tool() { + if Command::new("htpasswd").arg("-h").output().is_err() { + eprintln!("apache htpasswd not on PATH — skipping cross-tool compat test"); + return; + } + + let auth_dir = TempDir::new().unwrap(); + let storage = TempDir::new().unwrap(); + let htpasswd = auth_dir.path().join("htpasswd"); + + let config = persistent_config( + storage.path().to_path_buf(), + htpasswd.clone(), + auth_dir.path().join("tokens.db"), + ); + let auth = AuthState::load(&config.auth).expect("first boot"); + let app = router_with_auth(config, auth); + + let response = app + .oneshot(put_json("/-/user/org.couchdb.user:alice", adduser_body("alice", "compat-secret"))) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::CREATED); + + // `htpasswd -v` (verify-only) exits 0 when the password matches. + let status = Command::new("htpasswd") + .args(["-v", "-b"]) + .arg(&htpasswd) + .arg("alice") + .arg("compat-secret") + .status() + .expect("spawn htpasswd"); + assert!( + status.success(), + "apache htpasswd should verify the pnpr-written hash; exit={status:?}", + ); + + // And a wrong password should be rejected by the same tool. + let status = Command::new("htpasswd") + .args(["-v", "-b"]) + .arg(&htpasswd) + .arg("alice") + .arg("wrong-password") + .status() + .expect("spawn htpasswd"); + assert!( + !status.success(), + "apache htpasswd should reject a wrong password against the pnpr-written hash", + ); +} + +/// `auth.htpasswd.max_users: -1` blocks new registrations — but +/// existing users can still log in. Wired end-to-end through the +/// adduser HTTP endpoint to confirm the policy surfaces as a 403, +/// not a silent 201. +#[tokio::test] +async fn max_users_minus_one_disables_registration_end_to_end() { + let auth_dir = TempDir::new().unwrap(); + let storage = TempDir::new().unwrap(); + let mut config = Config::static_serve(listen(), storage.path().to_path_buf()); + config.auth = AuthConfig { + htpasswd: HtpasswdConfig { + file: Some(auth_dir.path().join("htpasswd")), + max_users: MaxUsers::Disabled, + }, + tokens: TokensConfig { file: Some(auth_dir.path().join("tokens.db")) }, + }; + let auth = AuthState::load(&config.auth).unwrap(); + let app = router_with_auth(config, auth); + + let response = app + .oneshot(put_json("/-/user/org.couchdb.user:newbie", adduser_body("newbie", "anything"))) + .await + .unwrap(); + assert_eq!(response.status(), StatusCode::FORBIDDEN); +}