Security Fixes: - CVE-2024-45339: Add O_EXCL flag to temp file creation in storage_write_entries() Prevents symlink attacks on predictable .tmp file paths - CVE-2025-47290: Use openat_nofollow() in storage_open() Closes TOCTOU race condition via path_sanitizer infrastructure - CVE-2025-0838: Add MAX_BATCH_SIZE=10000 to add_tasks() Prevents integer overflow in batch operations Research Trustworthiness (dataset_hash): - Deterministic file ordering: std::sort after collect_files() - Recursive directory traversal: depth-limited with cycle detection - Documented exclusions: hidden files and special files noted in API Bug Fixes: - R1: storage_init path validation for non-existent directories - R2: safe_strncpy return value check before strcat - R3: parallel_hash 256-file cap replaced with std::vector - R4: wire qi_compact_index/qi_rebuild_index stubs - R5: CompletionLatch race condition fix (hold mutex during decrement) - R6: ARMv8 SHA256 transform fix (save abcd_pre before vsha256hq_u32) - R7: fuzz_index_storage header format fix - R8: enforce null termination in add_tasks/update_tasks - R9: use 64 bytes (not 65) in combined hash to exclude null terminator - R10: status field persistence in save() New Tests: - test_recursive_dataset.cpp: Verify deterministic recursive hashing - test_storage_symlink_resistance.cpp: Verify CVE-2024-45339 fix - test_queue_index_batch_limit.cpp: Verify CVE-2025-0838 fix - test_sha256_arm_kat.cpp: ARMv8 known-answer tests - test_storage_init_new_dir.cpp: F1 verification - test_parallel_hash_large_dir.cpp: F3 verification - test_queue_index_compact.cpp: F4 verification All 8 native tests passing. Library ready for research lab deployment.
106 lines
2.6 KiB
C++
106 lines
2.6 KiB
C++
#include "file_hash.h"
|
|
#include <fcntl.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/stat.h>
|
|
#include <unistd.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
|
|
int hash_file(const char* path, size_t buffer_size, char* out_hash) {
|
|
if (!path || !out_hash) return -1;
|
|
|
|
int fd = open(path, O_RDONLY | O_CLOEXEC);
|
|
if (fd < 0) {
|
|
return -1;
|
|
}
|
|
|
|
struct stat st;
|
|
if (fstat(fd, &st) < 0) {
|
|
close(fd);
|
|
return -1;
|
|
}
|
|
|
|
Sha256State hasher;
|
|
sha256_init(&hasher);
|
|
|
|
if (st.st_size == 0) {
|
|
// Empty file
|
|
uint8_t result[32];
|
|
sha256_finalize(&hasher, result);
|
|
close(fd);
|
|
// Convert to hex
|
|
static const char hex[] = "0123456789abcdef";
|
|
for (int i = 0; i < 32; i++) {
|
|
out_hash[i*2] = hex[(result[i] >> 4) & 0xf];
|
|
out_hash[i*2+1] = hex[result[i] & 0xf];
|
|
}
|
|
out_hash[64] = '\0';
|
|
return 0;
|
|
}
|
|
|
|
// Try memory map first
|
|
void* mapped = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
|
|
if (mapped != MAP_FAILED) {
|
|
sha256_update(&hasher, (const uint8_t*)mapped, st.st_size);
|
|
munmap(mapped, st.st_size);
|
|
} else {
|
|
// Fallback to buffered read
|
|
uint8_t* buffer = (uint8_t*)malloc(buffer_size);
|
|
if (!buffer) {
|
|
close(fd);
|
|
return -1;
|
|
}
|
|
ssize_t n;
|
|
while ((n = read(fd, buffer, buffer_size)) > 0) {
|
|
sha256_update(&hasher, buffer, n);
|
|
}
|
|
free(buffer);
|
|
}
|
|
|
|
close(fd);
|
|
|
|
uint8_t result[32];
|
|
sha256_finalize(&hasher, result);
|
|
|
|
// Convert to hex
|
|
static const char hex[] = "0123456789abcdef";
|
|
for (int i = 0; i < 32; i++) {
|
|
out_hash[i*2] = hex[(result[i] >> 4) & 0xf];
|
|
out_hash[i*2+1] = hex[result[i] & 0xf];
|
|
}
|
|
out_hash[64] = '\0';
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Hash a single file, allocating result buffer
|
|
char* hash_file_alloc(const char* path, size_t buffer_size) {
|
|
char* out_hash = (char*)malloc(65); // 64 hex + null
|
|
if (!out_hash) return nullptr;
|
|
|
|
if (hash_file(path, buffer_size, out_hash) != 0) {
|
|
free(out_hash);
|
|
return nullptr;
|
|
}
|
|
return out_hash;
|
|
}
|
|
|
|
int hash_files_batch(
|
|
const char* const* paths,
|
|
uint32_t count,
|
|
char** out_hashes,
|
|
size_t buffer_size) {
|
|
|
|
if (!paths || !out_hashes) return -1;
|
|
|
|
int all_success = 1;
|
|
|
|
for (uint32_t i = 0; i < count; ++i) {
|
|
out_hashes[i] = hash_file_alloc(paths[i], buffer_size);
|
|
if (out_hashes[i] == nullptr) {
|
|
all_success = 0;
|
|
}
|
|
}
|
|
|
|
return all_success ? 0 : -1;
|
|
}
|