Security Fixes: - CVE-2024-45339: Add O_EXCL flag to temp file creation in storage_write_entries() Prevents symlink attacks on predictable .tmp file paths - CVE-2025-47290: Use openat_nofollow() in storage_open() Closes TOCTOU race condition via path_sanitizer infrastructure - CVE-2025-0838: Add MAX_BATCH_SIZE=10000 to add_tasks() Prevents integer overflow in batch operations Research Trustworthiness (dataset_hash): - Deterministic file ordering: std::sort after collect_files() - Recursive directory traversal: depth-limited with cycle detection - Documented exclusions: hidden files and special files noted in API Bug Fixes: - R1: storage_init path validation for non-existent directories - R2: safe_strncpy return value check before strcat - R3: parallel_hash 256-file cap replaced with std::vector - R4: wire qi_compact_index/qi_rebuild_index stubs - R5: CompletionLatch race condition fix (hold mutex during decrement) - R6: ARMv8 SHA256 transform fix (save abcd_pre before vsha256hq_u32) - R7: fuzz_index_storage header format fix - R8: enforce null termination in add_tasks/update_tasks - R9: use 64 bytes (not 65) in combined hash to exclude null terminator - R10: status field persistence in save() New Tests: - test_recursive_dataset.cpp: Verify deterministic recursive hashing - test_storage_symlink_resistance.cpp: Verify CVE-2024-45339 fix - test_queue_index_batch_limit.cpp: Verify CVE-2025-0838 fix - test_sha256_arm_kat.cpp: ARMv8 known-answer tests - test_storage_init_new_dir.cpp: F1 verification - test_parallel_hash_large_dir.cpp: F3 verification - test_queue_index_compact.cpp: F4 verification All 8 native tests passing. Library ready for research lab deployment.
180 lines
4.7 KiB
C++
180 lines
4.7 KiB
C++
#include <cassert>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <string>
|
|
#include <sys/stat.h>
|
|
#include <unistd.h>
|
|
#include <limits.h>
|
|
#include "../native/dataset_hash/dataset_hash.h"
|
|
|
|
// Get absolute path of current working directory
|
|
static std::string get_cwd() {
|
|
char buf[PATH_MAX];
|
|
if (getcwd(buf, sizeof(buf)) != nullptr) {
|
|
return std::string(buf);
|
|
}
|
|
return "";
|
|
}
|
|
|
|
// Test helper: create a file with content
|
|
static int create_file(const char* path, const char* content) {
|
|
FILE* f = fopen(path, "w");
|
|
if (!f) return -1;
|
|
fprintf(f, "%s", content);
|
|
fclose(f);
|
|
return 0;
|
|
}
|
|
|
|
// Test: Recursive dataset hashing
|
|
// Verifies that nested directories are traversed and files are sorted
|
|
static int test_recursive_hashing() {
|
|
std::string cwd = get_cwd();
|
|
if (cwd.empty()) return -1;
|
|
|
|
char base_dir[4096];
|
|
snprintf(base_dir, sizeof(base_dir), "%s/test_recursive_XXXXXX", cwd.c_str());
|
|
|
|
if (mkdtemp(base_dir) == nullptr) return -1;
|
|
|
|
// Create nested structure
|
|
char subdir[4096];
|
|
char deeper[4096];
|
|
snprintf(subdir, sizeof(subdir), "%s/subdir", base_dir);
|
|
snprintf(deeper, sizeof(deeper), "%s/subdir/deeper", base_dir);
|
|
|
|
if (mkdir(subdir, 0755) != 0) {
|
|
rmdir(base_dir);
|
|
return -1;
|
|
}
|
|
if (mkdir(deeper, 0755) != 0) {
|
|
rmdir(subdir);
|
|
rmdir(base_dir);
|
|
return -1;
|
|
}
|
|
|
|
// Create files
|
|
char path_z[4096];
|
|
char path_b[4096];
|
|
char path_a[4096];
|
|
char path_deep[4096];
|
|
snprintf(path_z, sizeof(path_z), "%s/z_file.txt", base_dir);
|
|
snprintf(path_b, sizeof(path_b), "%s/subdir/b_file.txt", base_dir);
|
|
snprintf(path_a, sizeof(path_a), "%s/subdir/a_file.txt", base_dir);
|
|
snprintf(path_deep, sizeof(path_deep), "%s/subdir/deeper/deep_file.txt", base_dir);
|
|
|
|
if (create_file(path_z, "z content") != 0 ||
|
|
create_file(path_b, "b content") != 0 ||
|
|
create_file(path_a, "a content") != 0 ||
|
|
create_file(path_deep, "deep content") != 0) {
|
|
unlink(path_z); unlink(path_b); unlink(path_a); unlink(path_deep);
|
|
rmdir(deeper); rmdir(subdir); rmdir(base_dir);
|
|
return -1;
|
|
}
|
|
|
|
// Hash the directory
|
|
fh_context_t* ctx = fh_init(0);
|
|
if (!ctx) {
|
|
unlink(path_z); unlink(path_b); unlink(path_a); unlink(path_deep);
|
|
rmdir(deeper); rmdir(subdir); rmdir(base_dir);
|
|
return -1;
|
|
}
|
|
|
|
char* hash1 = fh_hash_directory(ctx, base_dir);
|
|
if (!hash1 || strlen(hash1) != 64) {
|
|
fh_cleanup(ctx);
|
|
unlink(path_z); unlink(path_b); unlink(path_a); unlink(path_deep);
|
|
rmdir(deeper); rmdir(subdir); rmdir(base_dir);
|
|
return -1;
|
|
}
|
|
|
|
// Hash again - should produce identical result (deterministic)
|
|
char* hash2 = fh_hash_directory(ctx, base_dir);
|
|
if (!hash2 || strcmp(hash1, hash2) != 0) {
|
|
fh_free_string(hash1);
|
|
fh_cleanup(ctx);
|
|
unlink(path_z); unlink(path_b); unlink(path_a); unlink(path_deep);
|
|
rmdir(deeper); rmdir(subdir); rmdir(base_dir);
|
|
return -1;
|
|
}
|
|
|
|
// Cleanup
|
|
fh_free_string(hash1);
|
|
fh_free_string(hash2);
|
|
fh_cleanup(ctx);
|
|
|
|
// Remove test files
|
|
unlink(path_deep);
|
|
unlink(path_a);
|
|
unlink(path_b);
|
|
unlink(path_z);
|
|
rmdir(deeper);
|
|
rmdir(subdir);
|
|
rmdir(base_dir);
|
|
|
|
return 0;
|
|
}
|
|
|
|
// Test: Empty nested directories
|
|
static int test_empty_nested_dirs() {
|
|
std::string cwd = get_cwd();
|
|
char base_dir[4096];
|
|
snprintf(base_dir, sizeof(base_dir), "%s/test_empty_XXXXXX", cwd.c_str());
|
|
|
|
if (mkdtemp(base_dir) == nullptr) return -1;
|
|
|
|
char empty_subdir[4096];
|
|
snprintf(empty_subdir, sizeof(empty_subdir), "%s/empty_sub", base_dir);
|
|
if (mkdir(empty_subdir, 0755) != 0) {
|
|
rmdir(base_dir);
|
|
return -1;
|
|
}
|
|
|
|
char path[4096];
|
|
snprintf(path, sizeof(path), "%s/only_file.txt", base_dir);
|
|
if (create_file(path, "content") != 0) {
|
|
rmdir(empty_subdir);
|
|
rmdir(base_dir);
|
|
return -1;
|
|
}
|
|
|
|
fh_context_t* ctx = fh_init(0);
|
|
if (!ctx) {
|
|
unlink(path);
|
|
rmdir(empty_subdir);
|
|
rmdir(base_dir);
|
|
return -1;
|
|
}
|
|
|
|
char* hash = fh_hash_directory(ctx, base_dir);
|
|
if (!hash || strlen(hash) != 64) {
|
|
fh_cleanup(ctx);
|
|
unlink(path);
|
|
rmdir(empty_subdir);
|
|
rmdir(base_dir);
|
|
return -1;
|
|
}
|
|
|
|
fh_free_string(hash);
|
|
fh_cleanup(ctx);
|
|
|
|
unlink(path);
|
|
rmdir(empty_subdir);
|
|
rmdir(base_dir);
|
|
|
|
return 0;
|
|
}
|
|
|
|
int main() {
|
|
printf("Testing recursive dataset hashing...\n");
|
|
if (test_recursive_hashing() != 0) {
|
|
printf("FAILED\n");
|
|
return 1;
|
|
}
|
|
if (test_empty_nested_dirs() != 0) {
|
|
printf("FAILED\n");
|
|
return 1;
|
|
}
|
|
printf("All recursive dataset tests passed.\n");
|
|
return 0;
|
|
}
|