From 3f9c1bb56f1655f5748908e4112f4f332bd24499 Mon Sep 17 00:00:00 2001 From: Zoltan Kochan Date: Fri, 22 May 2026 01:55:31 +0200 Subject: [PATCH] perf(pacquet/tarball): skip per-snapshot deep clone on warm-cache prefetched hits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `run_with_mem_cache` was delegating to `run_without_mem_cache`, which ran the prefetched-cas-paths check and returned the hit via `(**cas_paths).clone()` — a deep clone of the inner `HashMap`. `run_with_mem_cache` then wrapped the result in `Arc::new(...)`. On a warm install with 1k+ snapshots that is one full per-file map allocation per snapshot, plus an Arc allocation, when the prefetched `Arc>` is already on hand. Lift the prefetched-cas-paths check into `run_with_mem_cache` so a hit returns `Arc::clone(prefetched_entry)` straight through and populates `mem_cache` with the same Arc — no per-entry walk, no fresh `Arc::new`. The inner check inside `run_without_mem_cache` stays for the direct callers (binary / zip archive paths in `install_package_by_snapshot`) that still want an owned `HashMap` they can mutate (e.g. to splice in the synthesized `package.json` for runtime archives). --- pacquet/crates/tarball/src/lib.rs | 44 ++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/pacquet/crates/tarball/src/lib.rs b/pacquet/crates/tarball/src/lib.rs index c1fbf7e4a4..7806b3f329 100644 --- a/pacquet/crates/tarball/src/lib.rs +++ b/pacquet/crates/tarball/src/lib.rs @@ -1779,7 +1779,49 @@ impl<'a> DownloadTarballToStore<'a> { self, mem_cache: &'a MemCache, ) -> Result>, TarballError> { - let &DownloadTarballToStore { package_url, .. } = &self; + let &DownloadTarballToStore { + package_url, + package_id, + package_integrity, + prefetched_cas_paths, + requester, + .. + } = &self; + + // Warm-cache fast path: when [`prefetch_cas_paths`] already + // batched the `(integrity, pkg_id)` row in at install start, + // return the `Arc` straight through instead of + // calling [`Self::run_without_mem_cache`] (which clones the + // inner per-file map by value before wrapping it in a fresh + // `Arc`). On warm installs every snapshot lands here; the + // deep clone the previous path was paying — entire + // `HashMap` per snapshot, where each entry + // is a `String` + `PathBuf` allocation — adds up to dominant + // memory traffic by 1k+ snapshots. + // + // Also stash the `Arc` into `mem_cache` keyed by URL so a + // second fetch of the same tarball (e.g. peer-resolved + // variants of the same package) hits the in-memory cache + // without re-checking the prefetched map. Matches what the + // normal path does with the result of + // [`Self::run_without_mem_cache`]. + if let Some(prefetched) = prefetched_cas_paths { + let cache_key = store_index_key(&package_integrity.to_string(), package_id); + if let Some(cas_paths) = prefetched.get(&cache_key) { + tracing::info!( + target: "pacquet::download", + ?package_url, + ?package_id, + "Reusing prefetched CAFS entry — skipping download (warm-cache fast path)", + ); + emit_progress_found_in_store::(package_id, requester); + let cas_paths = Arc::clone(cas_paths); + let cache_lock = + Arc::new(RwLock::new(CacheValue::Available(Arc::clone(&cas_paths)))); + mem_cache.insert(package_url.to_string(), cache_lock); + return Ok(cas_paths); + } + } // QUESTION: I see no copying from existing store_dir, is there such mechanism? // TODO: If it's not implemented yet, implement it