Security Fixes: - CVE-2024-45339: Add O_EXCL flag to temp file creation in storage_write_entries() Prevents symlink attacks on predictable .tmp file paths - CVE-2025-47290: Use openat_nofollow() in storage_open() Closes TOCTOU race condition via path_sanitizer infrastructure - CVE-2025-0838: Add MAX_BATCH_SIZE=10000 to add_tasks() Prevents integer overflow in batch operations Research Trustworthiness (dataset_hash): - Deterministic file ordering: std::sort after collect_files() - Recursive directory traversal: depth-limited with cycle detection - Documented exclusions: hidden files and special files noted in API Bug Fixes: - R1: storage_init path validation for non-existent directories - R2: safe_strncpy return value check before strcat - R3: parallel_hash 256-file cap replaced with std::vector - R4: wire qi_compact_index/qi_rebuild_index stubs - R5: CompletionLatch race condition fix (hold mutex during decrement) - R6: ARMv8 SHA256 transform fix (save abcd_pre before vsha256hq_u32) - R7: fuzz_index_storage header format fix - R8: enforce null termination in add_tasks/update_tasks - R9: use 64 bytes (not 65) in combined hash to exclude null terminator - R10: status field persistence in save() New Tests: - test_recursive_dataset.cpp: Verify deterministic recursive hashing - test_storage_symlink_resistance.cpp: Verify CVE-2024-45339 fix - test_queue_index_batch_limit.cpp: Verify CVE-2025-0838 fix - test_sha256_arm_kat.cpp: ARMv8 known-answer tests - test_storage_init_new_dir.cpp: F1 verification - test_parallel_hash_large_dir.cpp: F3 verification - test_queue_index_compact.cpp: F4 verification All 8 native tests passing. Library ready for research lab deployment.
94 lines
2.8 KiB
C++
94 lines
2.8 KiB
C++
// test_parallel_hash_large_dir.cpp - Verify parallel_hash handles >256 files
|
|
// Validates F3 fix: no truncation, correct combined hash for large directories
|
|
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <unistd.h>
|
|
#include <sys/stat.h>
|
|
#include <fcntl.h>
|
|
#include "../dataset_hash/threading/parallel_hash.h"
|
|
|
|
// Create a test file with known content
|
|
static bool create_test_file(const char* path, int content_id) {
|
|
int fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
|
|
if (fd < 0) return false;
|
|
|
|
// Write unique content based on content_id
|
|
char buf[64];
|
|
snprintf(buf, sizeof(buf), "Test file content %d\n", content_id);
|
|
write(fd, buf, strlen(buf));
|
|
close(fd);
|
|
return true;
|
|
}
|
|
|
|
int main() {
|
|
// Create a temporary directory
|
|
char tmpdir[] = "/tmp/test_large_dir_XXXXXX";
|
|
if (!mkdtemp(tmpdir)) {
|
|
printf("FAIL: Could not create temp directory\n");
|
|
return 1;
|
|
}
|
|
|
|
// Create 300 test files (more than old 256 limit)
|
|
const int num_files = 300;
|
|
for (int i = 0; i < num_files; i++) {
|
|
char path[256];
|
|
snprintf(path, sizeof(path), "%s/file_%04d.txt", tmpdir, i);
|
|
if (!create_test_file(path, i)) {
|
|
printf("FAIL: Could not create test file %d\n", i);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
printf("Created %d test files in %s\n", num_files, tmpdir);
|
|
|
|
// Initialize parallel hasher
|
|
ParallelHasher hasher;
|
|
if (!parallel_hasher_init(&hasher, 4, 64*1024)) {
|
|
printf("FAIL: Could not initialize parallel hasher\n");
|
|
return 1;
|
|
}
|
|
|
|
// Hash the directory
|
|
char combined_hash[65];
|
|
int result = parallel_hash_directory(&hasher, tmpdir, combined_hash);
|
|
|
|
if (result != 0) {
|
|
printf("FAIL: parallel_hash_directory returned %d\n", result);
|
|
parallel_hasher_cleanup(&hasher);
|
|
return 1;
|
|
}
|
|
|
|
printf("Combined hash: %s\n", combined_hash);
|
|
|
|
// Verify hash is valid (64 hex chars)
|
|
if (strlen(combined_hash) != 64) {
|
|
printf("FAIL: Hash length is %zu, expected 64\n", strlen(combined_hash));
|
|
parallel_hasher_cleanup(&hasher);
|
|
return 1;
|
|
}
|
|
|
|
for (int i = 0; i < 64; i++) {
|
|
char c = combined_hash[i];
|
|
if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f'))) {
|
|
printf("FAIL: Invalid hex char '%c' at position %d\n", c, i);
|
|
parallel_hasher_cleanup(&hasher);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
// Cleanup
|
|
parallel_hasher_cleanup(&hasher);
|
|
|
|
// Remove test files
|
|
for (int i = 0; i < num_files; i++) {
|
|
char path[256];
|
|
snprintf(path, sizeof(path), "%s/file_%04d.txt", tmpdir, i);
|
|
unlink(path);
|
|
}
|
|
rmdir(tmpdir);
|
|
|
|
printf("PASS: parallel_hash handles %d files without truncation (F3 fix verified)\n", num_files);
|
|
return 0;
|
|
}
|