mirror of
https://github.com/spacedriveapp/spacedrive.git
synced 2026-05-24 16:32:45 -04:00
[Refactor] CAS id generation code simplify
Credit: ChatGPT 4 "In this simplified version, the code calculates the sample interval based on the file size and then uses a single loop to handle both the small and large file cases. The last sample is always taken from the end of the file."
This commit is contained in:
@@ -18,32 +18,27 @@ async fn read_at(file: &mut File, offset: u64, size: u64) -> Result<Vec<u8>, io:
|
||||
}
|
||||
|
||||
pub async fn generate_cas_id(path: impl AsRef<Path>, size: u64) -> Result<String, io::Error> {
|
||||
// open file reference
|
||||
let mut file = File::open(path).await?;
|
||||
|
||||
let mut hasher = Hasher::new();
|
||||
|
||||
// include the file size in the checksum
|
||||
hasher.update(&size.to_le_bytes());
|
||||
|
||||
// if size is small enough, just read the whole thing
|
||||
|
||||
if SAMPLE_COUNT * SAMPLE_SIZE > size {
|
||||
let buf = read_at(&mut file, 0, size).await?;
|
||||
hasher.update(&buf);
|
||||
let sample_interval = if SAMPLE_COUNT * SAMPLE_SIZE > size {
|
||||
size
|
||||
} else {
|
||||
// loop over samples
|
||||
for i in 0..SAMPLE_COUNT {
|
||||
let buf = read_at(&mut file, (size / SAMPLE_COUNT) * i, SAMPLE_SIZE).await?;
|
||||
hasher.update(&buf);
|
||||
}
|
||||
// sample end of file
|
||||
let buf = read_at(&mut file, size - SAMPLE_SIZE, SAMPLE_SIZE).await?;
|
||||
size / SAMPLE_COUNT
|
||||
};
|
||||
|
||||
for i in 0..=SAMPLE_COUNT {
|
||||
let offset = if i == SAMPLE_COUNT {
|
||||
size - SAMPLE_SIZE
|
||||
} else {
|
||||
sample_interval * i
|
||||
};
|
||||
let buf = read_at(&mut file, offset, SAMPLE_SIZE).await?;
|
||||
hasher.update(&buf);
|
||||
}
|
||||
|
||||
let hex = hasher.finalize().to_hex();
|
||||
let mut id = hex.to_string();
|
||||
let mut id = hasher.finalize().to_hex();
|
||||
id.truncate(16);
|
||||
Ok(id)
|
||||
Ok(id.to_string())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user