Security Fixes: - CVE-2024-45339: Add O_EXCL flag to temp file creation in storage_write_entries() Prevents symlink attacks on predictable .tmp file paths - CVE-2025-47290: Use openat_nofollow() in storage_open() Closes TOCTOU race condition via path_sanitizer infrastructure - CVE-2025-0838: Add MAX_BATCH_SIZE=10000 to add_tasks() Prevents integer overflow in batch operations Research Trustworthiness (dataset_hash): - Deterministic file ordering: std::sort after collect_files() - Recursive directory traversal: depth-limited with cycle detection - Documented exclusions: hidden files and special files noted in API Bug Fixes: - R1: storage_init path validation for non-existent directories - R2: safe_strncpy return value check before strcat - R3: parallel_hash 256-file cap replaced with std::vector - R4: wire qi_compact_index/qi_rebuild_index stubs - R5: CompletionLatch race condition fix (hold mutex during decrement) - R6: ARMv8 SHA256 transform fix (save abcd_pre before vsha256hq_u32) - R7: fuzz_index_storage header format fix - R8: enforce null termination in add_tasks/update_tasks - R9: use 64 bytes (not 65) in combined hash to exclude null terminator - R10: status field persistence in save() New Tests: - test_recursive_dataset.cpp: Verify deterministic recursive hashing - test_storage_symlink_resistance.cpp: Verify CVE-2024-45339 fix - test_queue_index_batch_limit.cpp: Verify CVE-2025-0838 fix - test_sha256_arm_kat.cpp: ARMv8 known-answer tests - test_storage_init_new_dir.cpp: F1 verification - test_parallel_hash_large_dir.cpp: F3 verification - test_queue_index_compact.cpp: F4 verification All 8 native tests passing. Library ready for research lab deployment.
92 lines
2.7 KiB
C++
92 lines
2.7 KiB
C++
#pragma once
|
|
#include "../storage/index_storage.h"
|
|
#include "../heap/binary_heap.h"
|
|
#include "queue_index.h"
|
|
#include <cstring>
|
|
#include <mutex>
|
|
#include <vector>
|
|
#include <unordered_map>
|
|
#include <string>
|
|
|
|
// In-memory index entry with metadata
|
|
struct IndexEntry {
|
|
qi_task_t task;
|
|
uint64_t offset; // File offset (for future direct access)
|
|
bool dirty; // Modified since last save
|
|
};
|
|
|
|
// Priority queue comparator for IndexEntry
|
|
struct EntryComparator {
|
|
bool operator()(size_t a, size_t b, const std::vector<IndexEntry>& items) const {
|
|
const auto& ta = items[a].task;
|
|
const auto& tb = items[b].task;
|
|
if (ta.priority != tb.priority) {
|
|
return ta.priority < tb.priority; // Max-heap: higher priority first
|
|
}
|
|
return ta.created_at > tb.created_at; // Earlier first
|
|
}
|
|
};
|
|
|
|
// High-level priority queue index
|
|
class PriorityQueueIndex {
|
|
// Maximum batch size for add_tasks to prevent integer overflow (CVE-2025-0838)
|
|
static constexpr uint32_t MAX_BATCH_SIZE = 10000;
|
|
|
|
IndexStorage storage_;
|
|
std::vector<IndexEntry> entries_;
|
|
BinaryHeap<IndexEntry, EntryComparator> heap_;
|
|
mutable std::mutex mutex_;
|
|
char last_error_[256];
|
|
bool dirty_ = false;
|
|
|
|
// Hash map for O(1) task ID lookups
|
|
std::unordered_map<std::string, size_t> id_index_;
|
|
|
|
public:
|
|
explicit PriorityQueueIndex(const char* queue_dir);
|
|
~PriorityQueueIndex();
|
|
|
|
// Open/load existing index
|
|
bool open();
|
|
void close();
|
|
|
|
// Add tasks
|
|
int add_tasks(const qi_task_t* tasks, uint32_t count);
|
|
|
|
// Get next batch of tasks (highest priority first)
|
|
int get_next_batch(qi_task_t* out_tasks, uint32_t max_count, uint32_t* out_count);
|
|
|
|
// Save to disk
|
|
int save();
|
|
|
|
// Query
|
|
uint64_t count() const { return entries_.size(); }
|
|
bool empty() const { return entries_.empty(); }
|
|
|
|
// Get all tasks (returns newly allocated array, caller must free)
|
|
int get_all_tasks(qi_task_t** out_tasks, size_t* out_count);
|
|
|
|
// Get task by ID (O(1) lookup)
|
|
int get_task_by_id(const char* task_id, qi_task_t* out_task);
|
|
|
|
// Update tasks
|
|
int update_tasks(const qi_task_t* tasks, uint32_t count);
|
|
|
|
// Remove tasks
|
|
int remove_tasks(const char** task_ids, uint32_t count);
|
|
|
|
// Compact index (remove finished/failed tasks)
|
|
int compact_index();
|
|
|
|
// Rebuild heap
|
|
int rebuild();
|
|
|
|
// Error handling
|
|
const char* last_error() const { return last_error_[0] ? last_error_ : nullptr; }
|
|
void clear_error() { last_error_[0] = '\0'; }
|
|
|
|
private:
|
|
void load_entries();
|
|
void rebuild_heap();
|
|
void rebuild_id_index(); // Rebuild hash map from entries
|
|
};
|