From 4cd2dde35c723d64fd621d73db37d261a7cf9c97 Mon Sep 17 00:00:00 2001 From: jake <77554505+brxken128@users.noreply.github.com> Date: Thu, 13 Oct 2022 23:31:47 +0100 Subject: [PATCH] CAS ID Improvements (#413) * remove `ring` dependency and use `sha2` instead * use BLAKE3 and include full file checksum * update schema comments --- Cargo.lock | Bin 179166 -> 179472 bytes core/Cargo.toml | 3 +-- core/prisma/schema.prisma | 4 +-- core/src/object/cas.rs | 54 ++++++++++++++++++++------------------ 4 files changed, 31 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dd874bb855c68cb770f6d1ed6f50e8be868cb8be..5250160d46835c25154b5ac74648dd029cc00160 100644 GIT binary patch delta 506 zcmY+AJ!@1!6o#3NyTTebf|9tJBE~|Cpfh*o%txdC17R)2UCzv$ff$XryNK!3%5=&t zuGr=eC<}YB5p7ZlM$)Corpa#*1&MbT8_Sn-c+PX)!=KGJzndTS=TG*R@2^xT9HKtgbqqs+J^l3eo`LFF&!+B4oW3dNuM4DZ;8 zkVUP7;YLt}O_-q|2s(0W69r{FILT$^Gf)Yrf=050pcJU6bGCeOv_Jp)$|>9+S)6~) zD#_|3$P@rUEAIvPL^@TD+Q+GTmT^cTIboelE^r|gB;P+ufP&UN85yYpv`rOD(C_ delta 377 zcmYL_Jxjwt0EW3p!PXj-7IjdlIOx)FpSc_e`U9LQb(Kr*qJpFj_6HOfMFl0pO-@#4 zp~6uYx4KrGm39z+fP>(oc8TM&JnzGM@m+ZPD%`ykJDcwDVkd`^rSzvhXErXKTIZ%d zR>@a&5;fFbvXj0uxlXjkM39I|fhZO#jzb!16(bQ-<;yU}Tm_O790if4B7g)k#RIGv z#lB)ZZ7s|mwVSrx1dq$(73d+~!_Y=oU}8Q)Gd%@Lw$%k&uDS1lQl5LTD)$d|W!itJ ztx?9AL|i})1r$uMP)imIA(YPrp`7ptYseXwNQWXILdJ|Sfp~y;h?yClJ2STN1d>9w zXkVYfwws|@9e@+lAAs_tl85sDg5gowd=5aj^!LXoJ`3{@vbRH! Result, io: Ok(buf) } +fn to_hex_string(b: &[u8]) -> String { + b.iter().map(|c| format!("{:02x}", c)).collect::() +} + pub async fn generate_cas_id(path: PathBuf, size: u64) -> Result { // open file reference let mut file = File::open(path).await?; - let mut context = Context::new(&SHA256); + let mut hasher = Hasher::new(); // include the file size in the checksum - context.update(&size.to_le_bytes()); + hasher.update(&size.to_le_bytes()); // if size is small enough, just read the whole thing + if SAMPLE_COUNT * SAMPLE_SIZE > size { let buf = read_at(&mut file, 0, size).await?; - context.update(&buf); + hasher.update(&buf); } else { // loop over samples for i in 0..SAMPLE_COUNT { let buf = read_at(&mut file, (size / SAMPLE_COUNT) * i, SAMPLE_SIZE).await?; - context.update(&buf); + hasher.update(&buf); } // sample end of file let buf = read_at(&mut file, size - SAMPLE_SIZE, SAMPLE_SIZE).await?; - context.update(&buf); + hasher.update(&buf); } - let digest = context.finish(); - let hex = HEXLOWER.encode(digest.as_ref()); + let hex = to_hex_string(hasher.finalize().as_bytes()); Ok(hex) } -// pub fn full_checksum(path: &str) -> Result { -// // read file as buffer and convert to digest -// let mut reader = BufReader::new(File::open(path).unwrap()); -// let mut context = Context::new(&SHA256); -// let mut buffer = [0; 1024]; -// loop { -// let count = reader.read(&mut buffer)?; -// if count == 0 { -// break; -// } -// context.update(&buffer[..count]); -// } -// let digest = context.finish(); -// // create a lowercase hash from -// let hex = HEXLOWER.encode(digest.as_ref()); +pub async fn full_checksum(path: &str) -> Result { + const BLOCK_SIZE: usize = 1048576; + //read file as buffer and convert to digest + let mut reader = File::open(path).await?; + let mut context = Hasher::new(); + let mut buffer = [0; 1048576]; + loop { + let read_count = reader.read(&mut buffer).await?; + context.update(&buffer[..read_count]); + if read_count != BLOCK_SIZE { + break; + } + } + let hex = to_hex_string(context.finalize().as_bytes()); -// Ok(hex) -// } + Ok(hex) +}