fetch_ml/native/queue_index/index/priority_queue.cpp
Jeremie Fraeys 43d241c28d
feat: implement C++ native libraries for performance-critical operations
- Add arena allocator for zero-allocation hot paths
- Add thread pool for parallel operations
- Add mmap utilities for memory-mapped I/O
- Implement queue_index with heap-based priority queue
- Implement dataset_hash with SIMD support (SHA-NI, ARMv8)
- Add runtime SIMD detection for cross-platform correctness
- Add comprehensive tests and benchmarks
2026-02-16 20:38:04 -05:00

156 lines
4.3 KiB
C++

// priority_queue.cpp - C++ style but using C-style storage
#include "priority_queue.h"
#include <algorithm>
#include <cstring>
PriorityQueueIndex::PriorityQueueIndex(const char* queue_dir)
: heap_(entries_, EntryComparator{}) {
// Initialize storage (returns false if path invalid, we ignore - open() will fail)
storage_init(&storage_, queue_dir);
}
PriorityQueueIndex::~PriorityQueueIndex() {
close();
}
bool PriorityQueueIndex::open() {
if (!storage_open(&storage_)) {
strncpy(last_error_, "Failed to open storage", sizeof(last_error_) - 1);
last_error_[sizeof(last_error_) - 1] = '\0';
return false;
}
load_entries();
rebuild_heap();
return true;
}
void PriorityQueueIndex::close() {
if (dirty_) {
save();
}
storage_close(&storage_);
storage_cleanup(&storage_);
entries_.clear();
heap_.clear();
}
void PriorityQueueIndex::load_entries() {
entries_.clear();
// Try memory-mapped access first
if (storage_mmap_for_read(&storage_)) {
size_t count = storage_mmap_entry_count(&storage_);
const DiskEntry* disk_entries = storage_mmap_entries(&storage_);
entries_.reserve(count);
for (size_t i = 0; i < count; ++i) {
IndexEntry entry;
memcpy(&entry.task.id, disk_entries[i].id, 64);
entry.task.id[63] = '\0'; // Ensure null termination
memcpy(&entry.task.job_name, disk_entries[i].job_name, 128);
entry.task.job_name[127] = '\0'; // Ensure null termination
entry.task.priority = disk_entries[i].priority;
entry.task.created_at = disk_entries[i].created_at;
entry.task.next_retry = disk_entries[i].next_retry;
entry.offset = i;
entry.dirty = false;
entries_.push_back(entry);
}
}
storage_munmap(&storage_);
}
void PriorityQueueIndex::rebuild_heap() {
std::vector<size_t> queued_indices;
for (size_t i = 0; i < entries_.size(); ++i) {
queued_indices.push_back(i);
}
heap_.build(queued_indices);
}
int PriorityQueueIndex::add_tasks(const qi_task_t* tasks, uint32_t count) {
std::lock_guard<std::mutex> lock(mutex_);
for (uint32_t i = 0; i < count; ++i) {
IndexEntry entry;
entry.task = tasks[i];
entry.offset = 0;
entry.dirty = true;
entries_.push_back(entry);
}
dirty_ = true;
rebuild_heap();
return static_cast<int>(count);
}
int PriorityQueueIndex::get_next_batch(qi_task_t* out_tasks, uint32_t max_count, uint32_t* out_count) {
std::lock_guard<std::mutex> lock(mutex_);
uint32_t got = 0;
while (got < max_count && !heap_.empty()) {
size_t idx = heap_.pop();
if (idx >= entries_.size()) continue;
out_tasks[got] = entries_[idx].task;
got++;
}
if (out_count) {
*out_count = got;
}
return 0;
}
int PriorityQueueIndex::save() {
std::lock_guard<std::mutex> lock(mutex_);
// Convert entries to disk format
std::vector<DiskEntry> disk_entries;
disk_entries.reserve(entries_.size());
for (const auto& entry : entries_) {
DiskEntry disk;
memcpy(disk.id, entry.task.id, 64);
memcpy(disk.job_name, entry.task.job_name, 128);
disk.priority = entry.task.priority;
disk.created_at = entry.task.created_at;
disk.next_retry = entry.task.next_retry;
memset(disk.reserved, 0, sizeof(disk.reserved));
disk_entries.push_back(disk);
}
if (!storage_write_entries(&storage_, disk_entries.data(), disk_entries.size())) {
strncpy(last_error_, "Failed to write entries", sizeof(last_error_) - 1);
last_error_[sizeof(last_error_) - 1] = '\0';
return -1;
}
dirty_ = false;
return 0;
}
int PriorityQueueIndex::get_all_tasks(qi_task_t** out_tasks, size_t* out_count) {
std::lock_guard<std::mutex> lock(mutex_);
if (entries_.empty()) {
*out_tasks = nullptr;
*out_count = 0;
return 0;
}
qi_task_t* tasks = new qi_task_t[entries_.size()];
for (size_t i = 0; i < entries_.size(); ++i) {
tasks[i] = entries_[i].task;
}
*out_tasks = tasks;
*out_count = entries_.size();
return 0;
}