//! Integration tests for dataset_hash //! //! Tests file hashing with various file sizes and edge cases. use std::fs; use tempfile::TempDir; // Import the crate use dataset_hash::{collect_files, hash_directory_batch, hash_directory_combined, hash_file}; #[test] fn test_hash_file_basic() { let temp = TempDir::new().unwrap(); let file_path = temp.path().join("test.txt"); fs::write(&file_path, "hello world").unwrap(); let hash1 = hash_file(&file_path).unwrap(); let hash2 = hash_file(&file_path).unwrap(); // Hash should be deterministic assert_eq!(hash1, hash2); // BLAKE3 produces 64-char hex strings assert_eq!(hash1.len(), 64); } #[test] fn test_hash_file_empty() { let temp = TempDir::new().unwrap(); let file_path = temp.path().join("empty.txt"); fs::write(&file_path, "").unwrap(); let hash = hash_file(&file_path).unwrap(); assert_eq!(hash.len(), 64); } #[test] fn test_hash_file_large() { let temp = TempDir::new().unwrap(); let file_path = temp.path().join("large.bin"); // Create 10MB file let data = vec![0u8; 10 * 1024 * 1024]; fs::write(&file_path, &data).unwrap(); let hash = hash_file(&file_path).unwrap(); assert_eq!(hash.len(), 64); } #[test] fn test_hash_file_different_content() { let temp = TempDir::new().unwrap(); let file1 = temp.path().join("file1.txt"); let file2 = temp.path().join("file2.txt"); fs::write(&file1, "content A").unwrap(); fs::write(&file2, "content B").unwrap(); let hash1 = hash_file(&file1).unwrap(); let hash2 = hash_file(&file2).unwrap(); assert_ne!(hash1, hash2); } #[test] fn test_collect_files_excludes_hidden() { let temp = TempDir::new().unwrap(); fs::write(temp.path().join("visible.txt"), "data").unwrap(); fs::write(temp.path().join(".hidden"), "data").unwrap(); // Create hidden directory and file inside it let hidden_dir = temp.path().join(".hidden_dir"); fs::create_dir(&hidden_dir).unwrap(); fs::write(hidden_dir.join("file.txt"), "data").unwrap(); let files = collect_files(temp.path()).unwrap(); assert_eq!(files.len(), 1); assert!(files[0].file_name().unwrap() == "visible.txt"); } #[test] fn test_collect_files_sorted() { let temp = TempDir::new().unwrap(); fs::write(temp.path().join("z.txt"), "z").unwrap(); fs::write(temp.path().join("a.txt"), "a").unwrap(); fs::write(temp.path().join("m.txt"), "m").unwrap(); let files = collect_files(temp.path()).unwrap(); assert_eq!(files.len(), 3); assert!(files[0].file_name().unwrap() == "a.txt"); assert!(files[1].file_name().unwrap() == "m.txt"); assert!(files[2].file_name().unwrap() == "z.txt"); } #[test] fn test_hash_directory_batch() { let temp = TempDir::new().unwrap(); fs::write(temp.path().join("a.txt"), "AAA").unwrap(); fs::write(temp.path().join("b.txt"), "BBB").unwrap(); let pairs = hash_directory_batch(temp.path()).unwrap(); assert_eq!(pairs.len(), 2); // Verify each file has a hash for (path, hash) in &pairs { assert!(path.ends_with(".txt")); assert_eq!(hash.len(), 64); } } #[test] fn test_hash_directory_combined() { let temp = TempDir::new().unwrap(); fs::write(temp.path().join("a.txt"), "AAA").unwrap(); fs::write(temp.path().join("b.txt"), "BBB").unwrap(); let hash1 = hash_directory_combined(temp.path()).unwrap(); let hash2 = hash_directory_combined(temp.path()).unwrap(); // Combined hash should be deterministic assert_eq!(hash1, hash2); assert_eq!(hash1.len(), 64); } #[test] fn test_hash_directory_combined_changes_with_content() { let temp = TempDir::new().unwrap(); fs::write(temp.path().join("file.txt"), "content").unwrap(); let hash1 = hash_directory_combined(temp.path()).unwrap(); // Modify file fs::write(temp.path().join("file.txt"), "modified").unwrap(); let hash2 = hash_directory_combined(temp.path()).unwrap(); assert_ne!(hash1, hash2); } #[test] fn test_collect_files_no_symlinks() { let temp = TempDir::new().unwrap(); let real_file = temp.path().join("real.txt"); let symlink = temp.path().join("link.txt"); fs::write(&real_file, "data").unwrap(); #[cfg(unix)] std::os::unix::fs::symlink(&real_file, &symlink).unwrap(); let files = collect_files(temp.path()).unwrap(); // Should only include the real file, not the symlink assert_eq!(files.len(), 1); } #[test] fn test_hash_directory_empty() { let temp = TempDir::new().unwrap(); let pairs = hash_directory_batch(temp.path()).unwrap(); assert!(pairs.is_empty()); let combined = hash_directory_combined(temp.path()).unwrap(); // Empty directory should still produce a valid hash assert_eq!(combined.len(), 64); } #[test] fn test_hash_directory_nested() { let temp = TempDir::new().unwrap(); let subdir = temp.path().join("subdir"); fs::create_dir(&subdir).unwrap(); fs::write(temp.path().join("root.txt"), "root").unwrap(); fs::write(subdir.join("nested.txt"), "nested").unwrap(); let pairs = hash_directory_batch(temp.path()).unwrap(); assert_eq!(pairs.len(), 2); }