[Refactor] CAS id generation code simplify

Credit: ChatGPT 4 "In this simplified version, the code calculates the sample interval based on the file size and then uses a single loop to handle both the small and large file cases. The last sample is always taken from the end of the file."
2026-05-24 16:32:45 -04:00 · 2023-03-17 20:11:08 -07:00
parent 3b8c866cbc
commit 3ddd83000e
1 changed files with 14 additions and 19 deletions
--- a/core/src/object/cas.rs
+++ b/core/src/object/cas.rs
@@ -18,32 +18,27 @@ async fn read_at(file: &mut File, offset: u64, size: u64) -> Result<Vec<u8>, io:
 }

 pub async fn generate_cas_id(path: impl AsRef<Path>, size: u64) -> Result<String, io::Error> {
-	// open file reference
 	let mut file = File::open(path).await?;
-
 	let mut hasher = Hasher::new();
-
-	// include the file size in the checksum
 	hasher.update(&size.to_le_bytes());

-	// if size is small enough, just read the whole thing
-
-	if SAMPLE_COUNT * SAMPLE_SIZE > size {
-		let buf = read_at(&mut file, 0, size).await?;
-		hasher.update(&buf);
+	let sample_interval = if SAMPLE_COUNT * SAMPLE_SIZE > size {
+		size
 	} else {
-		// loop over samples
-		for i in 0..SAMPLE_COUNT {
-			let buf = read_at(&mut file, (size / SAMPLE_COUNT) * i, SAMPLE_SIZE).await?;
-			hasher.update(&buf);
-		}
-		// sample end of file
-		let buf = read_at(&mut file, size - SAMPLE_SIZE, SAMPLE_SIZE).await?;
+		size / SAMPLE_COUNT
+	};
+
+	for i in 0..=SAMPLE_COUNT {
+		let offset = if i == SAMPLE_COUNT {
+			size - SAMPLE_SIZE
+		} else {
+			sample_interval * i
+		};
+		let buf = read_at(&mut file, offset, SAMPLE_SIZE).await?;
 		hasher.update(&buf);
 	}

-	let hex = hasher.finalize().to_hex();
-	let mut id = hex.to_string();
+	let mut id = hasher.finalize().to_hex();
 	id.truncate(16);
-	Ok(id)
+	Ok(id.to_string())
 }