fetch_ml/native/tests/test_queue_index_batch_limit.cpp
Jeremie Fraeys 7efe8bbfbf
native: security hardening, research trustworthiness, and CVE mitigations
Security Fixes:
- CVE-2024-45339: Add O_EXCL flag to temp file creation in storage_write_entries()
  Prevents symlink attacks on predictable .tmp file paths
- CVE-2025-47290: Use openat_nofollow() in storage_open()
  Closes TOCTOU race condition via path_sanitizer infrastructure
- CVE-2025-0838: Add MAX_BATCH_SIZE=10000 to add_tasks()
  Prevents integer overflow in batch operations

Research Trustworthiness (dataset_hash):
- Deterministic file ordering: std::sort after collect_files()
- Recursive directory traversal: depth-limited with cycle detection
- Documented exclusions: hidden files and special files noted in API

Bug Fixes:
- R1: storage_init path validation for non-existent directories
- R2: safe_strncpy return value check before strcat
- R3: parallel_hash 256-file cap replaced with std::vector
- R4: wire qi_compact_index/qi_rebuild_index stubs
- R5: CompletionLatch race condition fix (hold mutex during decrement)
- R6: ARMv8 SHA256 transform fix (save abcd_pre before vsha256hq_u32)
- R7: fuzz_index_storage header format fix
- R8: enforce null termination in add_tasks/update_tasks
- R9: use 64 bytes (not 65) in combined hash to exclude null terminator
- R10: status field persistence in save()

New Tests:
- test_recursive_dataset.cpp: Verify deterministic recursive hashing
- test_storage_symlink_resistance.cpp: Verify CVE-2024-45339 fix
- test_queue_index_batch_limit.cpp: Verify CVE-2025-0838 fix
- test_sha256_arm_kat.cpp: ARMv8 known-answer tests
- test_storage_init_new_dir.cpp: F1 verification
- test_parallel_hash_large_dir.cpp: F3 verification
- test_queue_index_compact.cpp: F4 verification

All 8 native tests passing. Library ready for research lab deployment.
2026-02-21 13:33:45 -05:00

181 lines
5.5 KiB
C++

#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <string>
#include <sys/stat.h>
#include <unistd.h>
#include <limits.h>
#include "../native/queue_index/index/priority_queue.h"
// Get absolute path of current working directory
static std::string get_cwd() {
char buf[PATH_MAX];
if (getcwd(buf, sizeof(buf)) != nullptr) {
return std::string(buf);
}
return "";
}
// Test: Verify MAX_BATCH_SIZE enforcement (CVE-2025-0838)
static int test_batch_size_limit() {
printf(" Testing MAX_BATCH_SIZE enforcement (CVE-2025-0838)...\n");
std::string cwd = get_cwd();
char base_dir[4096];
snprintf(base_dir, sizeof(base_dir), "%s/test_batch_XXXXXX", cwd.c_str());
if (mkdtemp(base_dir) == nullptr) {
printf(" ERROR: mkdtemp failed\n");
return -1;
}
PriorityQueueIndex index(base_dir);
if (!index.open()) {
printf(" ERROR: failed to open index\n");
rmdir(base_dir);
return -1;
}
// Create a batch that exceeds MAX_BATCH_SIZE (10000)
const uint32_t oversized_batch = 10001;
qi_task_t* tasks = new qi_task_t[oversized_batch];
memset(tasks, 0, sizeof(qi_task_t) * oversized_batch);
for (uint32_t i = 0; i < oversized_batch; i++) {
snprintf(tasks[i].id, sizeof(tasks[i].id), "task_%u", i);
snprintf(tasks[i].job_name, sizeof(tasks[i].job_name), "job_%u", i);
snprintf(tasks[i].status, sizeof(tasks[i].status), "pending");
tasks[i].priority = static_cast<int64_t>(i);
tasks[i].created_at = 0;
tasks[i].next_retry = 0;
}
// Attempt to add oversized batch - should fail
int result = index.add_tasks(tasks, oversized_batch);
if (result != -1) {
printf(" ERROR: add_tasks should have rejected oversized batch\n");
delete[] tasks;
index.close();
rmdir(base_dir);
return -1;
}
// Verify error message was set
const char* error = index.last_error();
if (!error || strstr(error, "Batch size") == nullptr) {
printf(" ERROR: expected error message about batch size\n");
delete[] tasks;
index.close();
rmdir(base_dir);
return -1;
}
printf(" Oversized batch correctly rejected\n");
// Now try a batch at exactly MAX_BATCH_SIZE - should succeed
const uint32_t max_batch = 10000;
qi_task_t* valid_tasks = new qi_task_t[max_batch];
memset(valid_tasks, 0, sizeof(qi_task_t) * max_batch);
for (uint32_t i = 0; i < max_batch; i++) {
snprintf(valid_tasks[i].id, sizeof(valid_tasks[i].id), "valid_%u", i);
snprintf(valid_tasks[i].job_name, sizeof(valid_tasks[i].job_name), "job_%u", i);
snprintf(valid_tasks[i].status, sizeof(valid_tasks[i].status), "pending");
valid_tasks[i].priority = static_cast<int64_t>(i);
valid_tasks[i].created_at = 0;
valid_tasks[i].next_retry = 0;
}
// Clear previous error
index.clear_error();
result = index.add_tasks(valid_tasks, max_batch);
if (result != static_cast<int>(max_batch)) {
printf(" ERROR: add_tasks should have accepted max-sized batch\n");
delete[] tasks;
delete[] valid_tasks;
index.close();
rmdir(base_dir);
return -1;
}
printf(" Max-sized batch correctly accepted\n");
// Clean up
delete[] tasks;
delete[] valid_tasks;
index.close();
rmdir(base_dir);
printf(" MAX_BATCH_SIZE enforcement: PASSED\n");
return 0;
}
// Test: Verify small batches still work normally
static int test_small_batch() {
printf(" Testing small batch handling...\n");
std::string cwd = get_cwd();
char base_dir[4096];
snprintf(base_dir, sizeof(base_dir), "%s/test_small_XXXXXX", cwd.c_str());
if (mkdtemp(base_dir) == nullptr) {
printf(" ERROR: mkdtemp failed\n");
return -1;
}
PriorityQueueIndex index(base_dir);
if (!index.open()) {
printf(" ERROR: failed to open index\n");
rmdir(base_dir);
return -1;
}
// Add a small batch
const uint32_t small_count = 5;
qi_task_t tasks[small_count];
memset(tasks, 0, sizeof(tasks));
for (uint32_t i = 0; i < small_count; i++) {
snprintf(tasks[i].id, sizeof(tasks[i].id), "small_%u", i);
snprintf(tasks[i].job_name, sizeof(tasks[i].job_name), "job_%u", i);
snprintf(tasks[i].status, sizeof(tasks[i].status), "pending");
tasks[i].priority = static_cast<int64_t>(i);
tasks[i].created_at = 0;
tasks[i].next_retry = 0;
}
int result = index.add_tasks(tasks, small_count);
if (result != static_cast<int>(small_count)) {
printf(" ERROR: small batch should have been accepted\n");
index.close();
rmdir(base_dir);
return -1;
}
// Verify count
if (index.count() != small_count) {
printf(" ERROR: count mismatch after adding tasks\n");
index.close();
rmdir(base_dir);
return -1;
}
printf(" Small batch handled correctly\n");
// Clean up
index.close();
rmdir(base_dir);
printf(" Small batch handling: PASSED\n");
return 0;
}
int main() {
printf("Testing queue index batch limit (CVE-2025-0838)...\n");
if (test_batch_size_limit() != 0) return 1;
if (test_small_batch() != 0) return 1;
printf("All batch limit tests passed.\n");
return 0;
}