fetch_ml/native/tests/test_queue_index_compact.cpp
Jeremie Fraeys 7efe8bbfbf
native: security hardening, research trustworthiness, and CVE mitigations
Security Fixes:
- CVE-2024-45339: Add O_EXCL flag to temp file creation in storage_write_entries()
  Prevents symlink attacks on predictable .tmp file paths
- CVE-2025-47290: Use openat_nofollow() in storage_open()
  Closes TOCTOU race condition via path_sanitizer infrastructure
- CVE-2025-0838: Add MAX_BATCH_SIZE=10000 to add_tasks()
  Prevents integer overflow in batch operations

Research Trustworthiness (dataset_hash):
- Deterministic file ordering: std::sort after collect_files()
- Recursive directory traversal: depth-limited with cycle detection
- Documented exclusions: hidden files and special files noted in API

Bug Fixes:
- R1: storage_init path validation for non-existent directories
- R2: safe_strncpy return value check before strcat
- R3: parallel_hash 256-file cap replaced with std::vector
- R4: wire qi_compact_index/qi_rebuild_index stubs
- R5: CompletionLatch race condition fix (hold mutex during decrement)
- R6: ARMv8 SHA256 transform fix (save abcd_pre before vsha256hq_u32)
- R7: fuzz_index_storage header format fix
- R8: enforce null termination in add_tasks/update_tasks
- R9: use 64 bytes (not 65) in combined hash to exclude null terminator
- R10: status field persistence in save()

New Tests:
- test_recursive_dataset.cpp: Verify deterministic recursive hashing
- test_storage_symlink_resistance.cpp: Verify CVE-2024-45339 fix
- test_queue_index_batch_limit.cpp: Verify CVE-2025-0838 fix
- test_sha256_arm_kat.cpp: ARMv8 known-answer tests
- test_storage_init_new_dir.cpp: F1 verification
- test_parallel_hash_large_dir.cpp: F3 verification
- test_queue_index_compact.cpp: F4 verification

All 8 native tests passing. Library ready for research lab deployment.
2026-02-21 13:33:45 -05:00

127 lines
4.1 KiB
C++

// test_queue_index_compact.cpp - Verify qi_compact_index removes finished/failed tasks
// Validates F4 fix: compact_index actually removes tasks with finished/failed status
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <unistd.h>
#include <sys/stat.h>
#include "../queue_index/queue_index.h"
int main() {
// Create a temporary directory for the queue
char tmpdir[] = "/tmp/test_compact_XXXXXX";
if (!mkdtemp(tmpdir)) {
printf("FAIL: Could not create temp directory\n");
return 1;
}
// Open queue index
qi_index_t* idx = qi_open(tmpdir);
if (!idx) {
printf("FAIL: Could not open queue index\n");
return 1;
}
// Add tasks with different statuses
qi_task_t tasks[5];
memset(tasks, 0, sizeof(tasks));
// Task 0: queued
strncpy(tasks[0].id, "task_000", sizeof(tasks[0].id));
strncpy(tasks[0].job_name, "job0", sizeof(tasks[0].job_name));
strncpy(tasks[0].status, "queued", sizeof(tasks[0].status));
tasks[0].priority = 100;
// Task 1: finished (should be removed)
strncpy(tasks[1].id, "task_001", sizeof(tasks[1].id));
strncpy(tasks[1].job_name, "job1", sizeof(tasks[1].job_name));
strncpy(tasks[1].status, "finished", sizeof(tasks[1].status));
tasks[1].priority = 100;
// Task 2: failed (should be removed)
strncpy(tasks[2].id, "task_002", sizeof(tasks[2].id));
strncpy(tasks[2].job_name, "job2", sizeof(tasks[2].job_name));
strncpy(tasks[2].status, "failed", sizeof(tasks[2].status));
tasks[2].priority = 100;
// Task 3: running
strncpy(tasks[3].id, "task_003", sizeof(tasks[3].id));
strncpy(tasks[3].job_name, "job3", sizeof(tasks[3].job_name));
strncpy(tasks[3].status, "running", sizeof(tasks[3].status));
tasks[3].priority = 100;
// Task 4: queued
strncpy(tasks[4].id, "task_004", sizeof(tasks[4].id));
strncpy(tasks[4].job_name, "job4", sizeof(tasks[4].job_name));
strncpy(tasks[4].status, "queued", sizeof(tasks[4].status));
tasks[4].priority = 100;
int added = qi_add_tasks(idx, tasks, 5);
if (added != 5) {
printf("FAIL: Could not add tasks (added %d)\n", added);
qi_close(idx);
return 1;
}
printf("Added 5 tasks\n");
// Compact the index (no explicit save needed - happens on close)
int removed = qi_compact_index(idx);
if (removed != 2) {
printf("FAIL: Expected 2 tasks removed, got %d\n", removed);
qi_close(idx);
return 1;
}
// Close and reopen to verify persistence
qi_close(idx);
idx = qi_open(tmpdir);
if (!idx) {
printf("FAIL: Could not reopen queue index\n");
return 1;
}
// Get all remaining tasks
qi_task_t* remaining_tasks = nullptr;
size_t remaining_count = 0;
if (qi_get_all_tasks(idx, &remaining_tasks, &remaining_count) != 0) {
printf("FAIL: Could not get all tasks\n");
qi_close(idx);
return 1;
}
if (remaining_count != 3) {
printf("FAIL: Expected 3 remaining tasks, got %zu\n", remaining_count);
qi_free_task_array(remaining_tasks);
qi_close(idx);
return 1;
}
// Verify all remaining tasks are not finished/failed
for (size_t i = 0; i < remaining_count; i++) {
if (strcmp(remaining_tasks[i].status, "finished") == 0 ||
strcmp(remaining_tasks[i].status, "failed") == 0) {
printf("FAIL: Task %zu has status '%s' after compact\n", i, remaining_tasks[i].status);
qi_free_task_array(remaining_tasks);
qi_close(idx);
return 1;
}
}
printf("Remaining %zu tasks have correct statuses\n", remaining_count);
// Cleanup
qi_free_task_array(remaining_tasks);
qi_close(idx);
// Remove index file and directory
char index_path[256];
snprintf(index_path, sizeof(index_path), "%s/index.bin", tmpdir);
unlink(index_path);
rmdir(tmpdir);
printf("PASS: qi_compact_index removes finished/failed tasks (F4 fix verified)\n");
return 0;
}