replace sha2 with blake2 for duplicates

This commit is contained in:
Sam Hadow 2025-04-08 21:52:02 +02:00
parent 552ced69e1
commit 6966b141da
3 changed files with 12 additions and 11 deletions

View File

@ -1,13 +1,13 @@
[package] [package]
name = "du-rust" name = "du-rust"
version = "0.1.0" version = "0.1.1"
edition = "2021" edition = "2021"
authors = ["Sam Hadow"] authors = ["Sam Hadow"]
[dependencies] [dependencies]
blake2 = "0.10.6"
clap = { version = "4.4.6", features = ["derive"] } clap = { version = "4.4.6", features = ["derive"] }
rayon = "1.8.0" rayon = "1.8.0"
sha2 = "0.10.8"
[profile.release] [profile.release]
strip = true strip = true

View File

@ -2,7 +2,7 @@
_pkgname=disk-usage _pkgname=disk-usage
pkgname=du-rust pkgname=du-rust
pkgver=0.1.0 pkgver=0.1.1
pkgrel=1 pkgrel=1
pkgdesc="Disk usage tool in rust." pkgdesc="Disk usage tool in rust."
arch=('x86_64') arch=('x86_64')

View File

@ -8,7 +8,7 @@ use std::fs;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
// hash // hash
use sha2::{Digest, Sha256}; use blake2::{Blake2b512, Digest};
use std::io::{BufReader, Read}; use std::io::{BufReader, Read};
// parallelism // parallelism
@ -18,7 +18,7 @@ use std::sync::{Arc, Mutex};
impl FileTree { impl FileTree {
/// Find duplicates in a directory (including sub-directories). /// Find duplicates in a directory (including sub-directories).
/// ///
/// If path exist in tree, find duplicates using sha256. If 2 (or more) files have the same hash they're duplicates. /// If path exist in tree, find duplicates using Blake2b512. If 2 (or more) files have the same hash they're duplicates.
/// ///
/// returns a `Vec` containing a `Vec<PathBuf>` for each group of duplicates. /// returns a `Vec` containing a `Vec<PathBuf>` for each group of duplicates.
/// ///
@ -52,7 +52,7 @@ impl FileTree {
// parallelized loop // parallelized loop
intersection.par_iter().for_each(|item| { intersection.par_iter().for_each(|item| {
if let Ok(file) = fs::File::open(item) { if let Ok(file) = fs::File::open(item) {
let mut sha256 = Sha256::new(); let mut hasher = Blake2b512::new();
// sizable buffer // sizable buffer
let mut buffer = [0; 8192]; let mut buffer = [0; 8192];
@ -66,17 +66,18 @@ impl FileTree {
if count == 0 { if count == 0 {
break; break;
} }
sha256.update(&buffer[..count]); hasher.update(&buffer[..count]);
} }
let hash = sha256.finalize(); let hash = hasher.finalize();
let hash_bytes: [u8; 32] = hash.as_slice()[..32].try_into().unwrap();
// Use a Mutex to update HashMap in parallel // Use a Mutex to update HashMap in parallel
let mut locked_hashes = hashes.lock().unwrap(); let mut locked_hashes = hashes.lock().unwrap();
locked_hashes locked_hashes
.entry(hash.into()) .entry(hash_bytes)
.or_default() .or_default()
.push(item.clone()); .push(PathBuf::from(item));
} }
}); });
@ -100,7 +101,7 @@ impl FileTree {
} }
} }
/// Collision shouldn't happen with sha256, but a method to check if there is a collision in duplicates found /// Collision shouldn't happen with blake2, but a method to check if there is a collision in duplicates found
/// # Examples /// # Examples
/// ```no_run /// ```no_run
/// let p = Path::new("."); /// let p = Path::new(".");