diff --git a/Cargo.lock b/Cargo.lock index dd874bb85..5250160d4 100644 Binary files a/Cargo.lock and b/Cargo.lock differ diff --git a/core/Cargo.toml b/core/Cargo.toml index 3ad5da6a2..c0889def6 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -28,11 +28,10 @@ serde = { version = "1.0", features = ["derive"] } chrono = { version = "0.4.22", features = ["serde"] } serde_json = "1.0" futures = "0.3" -data-encoding = "2.3.2" -ring = "0.17.0-alpha.11" int-enum = "0.4.0" rmp = "^0.8.11" rmp-serde = "^1.1.1" +blake3 = "1.3.1" # Project dependencies rspc = { workspace = true, features = ["uuid", "chrono", "tracing"] } diff --git a/core/prisma/schema.prisma b/core/prisma/schema.prisma index e05e3d6a8..a6cd7f97d 100644 --- a/core/prisma/schema.prisma +++ b/core/prisma/schema.prisma @@ -98,9 +98,9 @@ model Location { model Object { id Int @id @default(autoincrement()) - // content addressable storage id - sha256 sampled checksum + // content addressable storage id - blake3 sampled checksum cas_id String @unique - // full byte contents digested into sha256 checksum + // full byte contents digested into blake3 checksum integrity_checksum String? @unique // basic metadata name String? diff --git a/core/src/object/cas.rs b/core/src/object/cas.rs index a671ecea3..58f6858de 100644 --- a/core/src/object/cas.rs +++ b/core/src/object/cas.rs @@ -1,5 +1,4 @@ -use data_encoding::HEXLOWER; -use ring::digest::{Context, SHA256}; +use blake3::Hasher; use std::path::PathBuf; use tokio::{ fs::File, @@ -18,51 +17,54 @@ async fn read_at(file: &mut File, offset: u64, size: u64) -> Result, io: Ok(buf) } +fn to_hex_string(b: &[u8]) -> String { + b.iter().map(|c| format!("{:02x}", c)).collect::() +} + pub async fn generate_cas_id(path: PathBuf, size: u64) -> Result { // open file reference let mut file = File::open(path).await?; - let mut context = Context::new(&SHA256); + let mut hasher = Hasher::new(); // include the file size in the checksum - context.update(&size.to_le_bytes()); + hasher.update(&size.to_le_bytes()); // if size is small enough, just read the whole thing + if SAMPLE_COUNT * SAMPLE_SIZE > size { let buf = read_at(&mut file, 0, size).await?; - context.update(&buf); + hasher.update(&buf); } else { // loop over samples for i in 0..SAMPLE_COUNT { let buf = read_at(&mut file, (size / SAMPLE_COUNT) * i, SAMPLE_SIZE).await?; - context.update(&buf); + hasher.update(&buf); } // sample end of file let buf = read_at(&mut file, size - SAMPLE_SIZE, SAMPLE_SIZE).await?; - context.update(&buf); + hasher.update(&buf); } - let digest = context.finish(); - let hex = HEXLOWER.encode(digest.as_ref()); + let hex = to_hex_string(hasher.finalize().as_bytes()); Ok(hex) } -// pub fn full_checksum(path: &str) -> Result { -// // read file as buffer and convert to digest -// let mut reader = BufReader::new(File::open(path).unwrap()); -// let mut context = Context::new(&SHA256); -// let mut buffer = [0; 1024]; -// loop { -// let count = reader.read(&mut buffer)?; -// if count == 0 { -// break; -// } -// context.update(&buffer[..count]); -// } -// let digest = context.finish(); -// // create a lowercase hash from -// let hex = HEXLOWER.encode(digest.as_ref()); +pub async fn full_checksum(path: &str) -> Result { + const BLOCK_SIZE: usize = 1048576; + //read file as buffer and convert to digest + let mut reader = File::open(path).await?; + let mut context = Hasher::new(); + let mut buffer = [0; 1048576]; + loop { + let read_count = reader.read(&mut buffer).await?; + context.update(&buffer[..read_count]); + if read_count != BLOCK_SIZE { + break; + } + } + let hex = to_hex_string(context.finalize().as_bytes()); -// Ok(hex) -// } + Ok(hex) +}