From 3ddd83000e379de931fd4db040030bc3d111bd5d Mon Sep 17 00:00:00 2001 From: Jamie Pine Date: Fri, 17 Mar 2023 20:11:08 -0700 Subject: [PATCH] [Refactor] CAS id generation code simplify Credit: ChatGPT 4 "In this simplified version, the code calculates the sample interval based on the file size and then uses a single loop to handle both the small and large file cases. The last sample is always taken from the end of the file." --- core/src/object/cas.rs | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/core/src/object/cas.rs b/core/src/object/cas.rs index 77cd31c2d..c48423dce 100644 --- a/core/src/object/cas.rs +++ b/core/src/object/cas.rs @@ -18,32 +18,27 @@ async fn read_at(file: &mut File, offset: u64, size: u64) -> Result, io: } pub async fn generate_cas_id(path: impl AsRef, size: u64) -> Result { - // open file reference let mut file = File::open(path).await?; - let mut hasher = Hasher::new(); - - // include the file size in the checksum hasher.update(&size.to_le_bytes()); - // if size is small enough, just read the whole thing - - if SAMPLE_COUNT * SAMPLE_SIZE > size { - let buf = read_at(&mut file, 0, size).await?; - hasher.update(&buf); + let sample_interval = if SAMPLE_COUNT * SAMPLE_SIZE > size { + size } else { - // loop over samples - for i in 0..SAMPLE_COUNT { - let buf = read_at(&mut file, (size / SAMPLE_COUNT) * i, SAMPLE_SIZE).await?; - hasher.update(&buf); - } - // sample end of file - let buf = read_at(&mut file, size - SAMPLE_SIZE, SAMPLE_SIZE).await?; + size / SAMPLE_COUNT + }; + + for i in 0..=SAMPLE_COUNT { + let offset = if i == SAMPLE_COUNT { + size - SAMPLE_SIZE + } else { + sample_interval * i + }; + let buf = read_at(&mut file, offset, SAMPLE_SIZE).await?; hasher.update(&buf); } - let hex = hasher.finalize().to_hex(); - let mut id = hex.to_string(); + let mut id = hasher.finalize().to_hex(); id.truncate(16); - Ok(id) + Ok(id.to_string()) }