From 0b41b34c45aa62b320221b2ac11780d394e43b3f Mon Sep 17 00:00:00 2001 From: Sam Hadow Date: Tue, 8 Apr 2025 22:03:49 +0200 Subject: [PATCH] blake 3 instead of blake 2 --- Cargo.toml | 1 + src/duplicates.rs | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 78a7b81..96714e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ authors = ["Sam Hadow"] [dependencies] blake2 = "0.10.6" +blake3 = "1.8.1" clap = { version = "4.4.6", features = ["derive"] } rayon = "1.8.0" diff --git a/src/duplicates.rs b/src/duplicates.rs index 5949706..b96231b 100644 --- a/src/duplicates.rs +++ b/src/duplicates.rs @@ -8,7 +8,7 @@ use std::fs; use std::path::{Path, PathBuf}; // hash -use blake2::{Blake2b512, Digest}; +use blake3::Hasher; use std::io::{BufReader, Read}; // parallelism @@ -18,7 +18,7 @@ use std::sync::{Arc, Mutex}; impl FileTree { /// Find duplicates in a directory (including sub-directories). /// - /// If path exist in tree, find duplicates using Blake2b512. If 2 (or more) files have the same hash they're duplicates. + /// If path exist in tree, find duplicates using Blake3. If 2 (or more) files have the same hash they're duplicates. /// /// returns a `Vec` containing a `Vec` for each group of duplicates. /// @@ -52,7 +52,7 @@ impl FileTree { // parallelized loop intersection.par_iter().for_each(|item| { if let Ok(file) = fs::File::open(item) { - let mut hasher = Blake2b512::new(); + let mut hasher = Hasher::new(); // sizable buffer let mut buffer = [0; 8192]; @@ -69,8 +69,9 @@ impl FileTree { hasher.update(&buffer[..count]); } - let hash = hasher.finalize(); - let hash_bytes: [u8; 32] = hash.as_slice()[..32].try_into().unwrap(); + let mut hash = hasher.finalize_xof(); + let mut hash_bytes: [u8; 32] = [0; 32]; + hash.fill(&mut hash_bytes); // Use a Mutex to update HashMap in parallel let mut locked_hashes = hashes.lock().unwrap();