Implements two production-ready Rust native libraries: ## dataset_hash (BLAKE3-based hashing) - FFI exports: ds_hash_file, ds_hash_directory_batch, ds_hash_directory_combined - BLAKE3 hashing for files and directory trees - Hidden file filtering (respects .hidden and _prefix files) - Prometheus-compatible metrics export - Comprehensive integration tests (12 tests) - Benchmarks: hash_file_1kb (~14µs), hash_file_1mb (~610µs), dir_100files (~1.6ms) ## queue_index (priority queue) - FFI exports: 25+ functions matching C++ API - Lifecycle: qi_open, qi_close - Task ops: add_tasks, update_tasks, remove_tasks, get_task_by_id - Queue ops: get_next_batch, peek_next, mark_completed - Priority: get_next_priority_task, peek_priority_task - Query: get_all_tasks, get_tasks_by_status, get_task_count - Retry/DLQ: retry_task, move_to_dlq - Lease: renew_lease, release_lease - Maintenance: rebuild_index, compact_index - BinaryHeap-based priority queue with correct Ord (max-heap) - Memory-mapped storage with safe Rust wrappers - Panic-safe FFI boundaries using catch_unwind - Comprehensive integration tests (7 tests, 1 ignored for persistence) - Benchmarks: add_100 (~60µs), get_10 (~24ns), priority (~5µs) ## Architecture - Cargo workspace with shared common crate - Criterion benchmarks for both crates - Rust 1.85.0 toolchain pinned - Zero compiler warnings - All 19 tests passing Compare: make compare-benchmarks (Rust/Go/C++ comparison)
177 lines
5.2 KiB
Rust
177 lines
5.2 KiB
Rust
//! Integration tests for dataset_hash
|
|
//!
|
|
//! Tests file hashing with various file sizes and edge cases.
|
|
|
|
use std::fs;
|
|
use tempfile::TempDir;
|
|
|
|
// Import the crate
|
|
use dataset_hash::{collect_files, hash_directory_batch, hash_directory_combined, hash_file};
|
|
|
|
#[test]
|
|
fn test_hash_file_basic() {
|
|
let temp = TempDir::new().unwrap();
|
|
let file_path = temp.path().join("test.txt");
|
|
fs::write(&file_path, "hello world").unwrap();
|
|
|
|
let hash1 = hash_file(&file_path).unwrap();
|
|
let hash2 = hash_file(&file_path).unwrap();
|
|
|
|
// Hash should be deterministic
|
|
assert_eq!(hash1, hash2);
|
|
// BLAKE3 produces 64-char hex strings
|
|
assert_eq!(hash1.len(), 64);
|
|
}
|
|
|
|
#[test]
|
|
fn test_hash_file_empty() {
|
|
let temp = TempDir::new().unwrap();
|
|
let file_path = temp.path().join("empty.txt");
|
|
fs::write(&file_path, "").unwrap();
|
|
|
|
let hash = hash_file(&file_path).unwrap();
|
|
assert_eq!(hash.len(), 64);
|
|
}
|
|
|
|
#[test]
|
|
fn test_hash_file_large() {
|
|
let temp = TempDir::new().unwrap();
|
|
let file_path = temp.path().join("large.bin");
|
|
|
|
// Create 10MB file
|
|
let data = vec![0u8; 10 * 1024 * 1024];
|
|
fs::write(&file_path, &data).unwrap();
|
|
|
|
let hash = hash_file(&file_path).unwrap();
|
|
assert_eq!(hash.len(), 64);
|
|
}
|
|
|
|
#[test]
|
|
fn test_hash_file_different_content() {
|
|
let temp = TempDir::new().unwrap();
|
|
let file1 = temp.path().join("file1.txt");
|
|
let file2 = temp.path().join("file2.txt");
|
|
|
|
fs::write(&file1, "content A").unwrap();
|
|
fs::write(&file2, "content B").unwrap();
|
|
|
|
let hash1 = hash_file(&file1).unwrap();
|
|
let hash2 = hash_file(&file2).unwrap();
|
|
|
|
assert_ne!(hash1, hash2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_collect_files_excludes_hidden() {
|
|
let temp = TempDir::new().unwrap();
|
|
fs::write(temp.path().join("visible.txt"), "data").unwrap();
|
|
fs::write(temp.path().join(".hidden"), "data").unwrap();
|
|
|
|
// Create hidden directory and file inside it
|
|
let hidden_dir = temp.path().join(".hidden_dir");
|
|
fs::create_dir(&hidden_dir).unwrap();
|
|
fs::write(hidden_dir.join("file.txt"), "data").unwrap();
|
|
|
|
let files = collect_files(temp.path()).unwrap();
|
|
assert_eq!(files.len(), 1);
|
|
assert!(files[0].file_name().unwrap() == "visible.txt");
|
|
}
|
|
|
|
#[test]
|
|
fn test_collect_files_sorted() {
|
|
let temp = TempDir::new().unwrap();
|
|
fs::write(temp.path().join("z.txt"), "z").unwrap();
|
|
fs::write(temp.path().join("a.txt"), "a").unwrap();
|
|
fs::write(temp.path().join("m.txt"), "m").unwrap();
|
|
|
|
let files = collect_files(temp.path()).unwrap();
|
|
assert_eq!(files.len(), 3);
|
|
assert!(files[0].file_name().unwrap() == "a.txt");
|
|
assert!(files[1].file_name().unwrap() == "m.txt");
|
|
assert!(files[2].file_name().unwrap() == "z.txt");
|
|
}
|
|
|
|
#[test]
|
|
fn test_hash_directory_batch() {
|
|
let temp = TempDir::new().unwrap();
|
|
fs::write(temp.path().join("a.txt"), "AAA").unwrap();
|
|
fs::write(temp.path().join("b.txt"), "BBB").unwrap();
|
|
|
|
let pairs = hash_directory_batch(temp.path()).unwrap();
|
|
assert_eq!(pairs.len(), 2);
|
|
|
|
// Verify each file has a hash
|
|
for (path, hash) in &pairs {
|
|
assert!(path.ends_with(".txt"));
|
|
assert_eq!(hash.len(), 64);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_hash_directory_combined() {
|
|
let temp = TempDir::new().unwrap();
|
|
fs::write(temp.path().join("a.txt"), "AAA").unwrap();
|
|
fs::write(temp.path().join("b.txt"), "BBB").unwrap();
|
|
|
|
let hash1 = hash_directory_combined(temp.path()).unwrap();
|
|
let hash2 = hash_directory_combined(temp.path()).unwrap();
|
|
|
|
// Combined hash should be deterministic
|
|
assert_eq!(hash1, hash2);
|
|
assert_eq!(hash1.len(), 64);
|
|
}
|
|
|
|
#[test]
|
|
fn test_hash_directory_combined_changes_with_content() {
|
|
let temp = TempDir::new().unwrap();
|
|
fs::write(temp.path().join("file.txt"), "content").unwrap();
|
|
|
|
let hash1 = hash_directory_combined(temp.path()).unwrap();
|
|
|
|
// Modify file
|
|
fs::write(temp.path().join("file.txt"), "modified").unwrap();
|
|
|
|
let hash2 = hash_directory_combined(temp.path()).unwrap();
|
|
|
|
assert_ne!(hash1, hash2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_collect_files_no_symlinks() {
|
|
let temp = TempDir::new().unwrap();
|
|
let real_file = temp.path().join("real.txt");
|
|
let symlink = temp.path().join("link.txt");
|
|
|
|
fs::write(&real_file, "data").unwrap();
|
|
#[cfg(unix)]
|
|
std::os::unix::fs::symlink(&real_file, &symlink).unwrap();
|
|
|
|
let files = collect_files(temp.path()).unwrap();
|
|
// Should only include the real file, not the symlink
|
|
assert_eq!(files.len(), 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_hash_directory_empty() {
|
|
let temp = TempDir::new().unwrap();
|
|
|
|
let pairs = hash_directory_batch(temp.path()).unwrap();
|
|
assert!(pairs.is_empty());
|
|
|
|
let combined = hash_directory_combined(temp.path()).unwrap();
|
|
// Empty directory should still produce a valid hash
|
|
assert_eq!(combined.len(), 64);
|
|
}
|
|
|
|
#[test]
|
|
fn test_hash_directory_nested() {
|
|
let temp = TempDir::new().unwrap();
|
|
let subdir = temp.path().join("subdir");
|
|
fs::create_dir(&subdir).unwrap();
|
|
|
|
fs::write(temp.path().join("root.txt"), "root").unwrap();
|
|
fs::write(subdir.join("nested.txt"), "nested").unwrap();
|
|
|
|
let pairs = hash_directory_batch(temp.path()).unwrap();
|
|
assert_eq!(pairs.len(), 2);
|
|
}
|