[Refactor] CAS id generation code simplify

Credit: ChatGPT 4
"In this simplified version, the code calculates the sample interval based on the file size and then uses a single loop to handle both the small and large file cases. The last sample is always taken from the end of the file."
This commit is contained in:
Jamie Pine
2023-03-17 20:11:08 -07:00
parent 3b8c866cbc
commit 3ddd83000e

View File

@@ -18,32 +18,27 @@ async fn read_at(file: &mut File, offset: u64, size: u64) -> Result<Vec<u8>, io:
}
pub async fn generate_cas_id(path: impl AsRef<Path>, size: u64) -> Result<String, io::Error> {
// open file reference
let mut file = File::open(path).await?;
let mut hasher = Hasher::new();
// include the file size in the checksum
hasher.update(&size.to_le_bytes());
// if size is small enough, just read the whole thing
if SAMPLE_COUNT * SAMPLE_SIZE > size {
let buf = read_at(&mut file, 0, size).await?;
hasher.update(&buf);
let sample_interval = if SAMPLE_COUNT * SAMPLE_SIZE > size {
size
} else {
// loop over samples
for i in 0..SAMPLE_COUNT {
let buf = read_at(&mut file, (size / SAMPLE_COUNT) * i, SAMPLE_SIZE).await?;
hasher.update(&buf);
}
// sample end of file
let buf = read_at(&mut file, size - SAMPLE_SIZE, SAMPLE_SIZE).await?;
size / SAMPLE_COUNT
};
for i in 0..=SAMPLE_COUNT {
let offset = if i == SAMPLE_COUNT {
size - SAMPLE_SIZE
} else {
sample_interval * i
};
let buf = read_at(&mut file, offset, SAMPLE_SIZE).await?;
hasher.update(&buf);
}
let hex = hasher.finalize().to_hex();
let mut id = hex.to_string();
let mut id = hasher.finalize().to_hex();
id.truncate(16);
Ok(id)
Ok(id.to_string())
}