fetch_ml/native_rust/queue_index/src/lib.rs
Jeremie Fraeys 6949287fb3
feat(native_rust): implement BLAKE3 dataset_hash and priority queue_index
Implements two production-ready Rust native libraries:

## dataset_hash (BLAKE3-based hashing)
- FFI exports: ds_hash_file, ds_hash_directory_batch, ds_hash_directory_combined
- BLAKE3 hashing for files and directory trees
- Hidden file filtering (respects .hidden and _prefix files)
- Prometheus-compatible metrics export
- Comprehensive integration tests (12 tests)
- Benchmarks: hash_file_1kb (~14µs), hash_file_1mb (~610µs), dir_100files (~1.6ms)

## queue_index (priority queue)
- FFI exports: 25+ functions matching C++ API
  - Lifecycle: qi_open, qi_close
  - Task ops: add_tasks, update_tasks, remove_tasks, get_task_by_id
  - Queue ops: get_next_batch, peek_next, mark_completed
  - Priority: get_next_priority_task, peek_priority_task
  - Query: get_all_tasks, get_tasks_by_status, get_task_count
  - Retry/DLQ: retry_task, move_to_dlq
  - Lease: renew_lease, release_lease
  - Maintenance: rebuild_index, compact_index
- BinaryHeap-based priority queue with correct Ord (max-heap)
- Memory-mapped storage with safe Rust wrappers
- Panic-safe FFI boundaries using catch_unwind
- Comprehensive integration tests (7 tests, 1 ignored for persistence)
- Benchmarks: add_100 (~60µs), get_10 (~24ns), priority (~5µs)

## Architecture
- Cargo workspace with shared common crate
- Criterion benchmarks for both crates
- Rust 1.85.0 toolchain pinned
- Zero compiler warnings
- All 19 tests passing

Compare: make compare-benchmarks (Rust/Go/C++ comparison)
2026-03-23 12:52:13 -04:00

829 lines
20 KiB
Rust

//! Queue Index - High-performance priority queue with mmap persistence
//!
//! This crate provides a Rust implementation of the queue index with FFI exports
//! for integration with Go. It uses memory-mapped files for persistence and
//! std::sync::Mutex for thread-safe operations.
use std::ffi::{CStr, CString};
use std::os::raw::{c_char, c_int};
use std::path::PathBuf;
use std::ptr;
use std::sync::{Arc, Mutex};
mod index;
mod storage;
mod task;
pub use index::QueueIndexImpl as QueueIndex;
pub use storage::IndexStorage;
pub use task::Task;
use index::QueueIndexImpl;
/// Opaque handle for queue index
pub struct QiIndex {
inner: Arc<Mutex<QueueIndexImpl>>,
last_error: Mutex<Option<String>>,
}
/// Task structure - matches C FFI layout
#[repr(C)]
pub struct QiTask {
pub id: [c_char; 64],
pub job_name: [c_char; 128],
pub priority: i64,
pub created_at: i64,
pub next_retry: i64,
pub status: [c_char; 16],
pub retries: u32,
}
impl QiTask {
fn from_task(task: &Task) -> Self {
let mut qi_task = QiTask {
id: [0; 64],
job_name: [0; 128],
priority: task.priority,
created_at: task.created_at,
next_retry: task.next_retry,
status: [0; 16],
retries: task.retries,
};
// Copy strings with null termination
Self::copy_str(&mut qi_task.id, &task.id, 64);
Self::copy_str(&mut qi_task.job_name, &task.job_name, 128);
Self::copy_str(&mut qi_task.status, &task.status, 16);
qi_task
}
fn copy_str(dest: &mut [c_char], src: &str, max_len: usize) {
let bytes = src.as_bytes();
let len = bytes.len().min(max_len - 1);
for i in 0..len {
dest[i] = bytes[i] as c_char;
}
dest[len] = 0;
}
fn to_task(&self) -> Task {
Task {
id: Self::cstr_to_string(&self.id),
job_name: Self::cstr_to_string(&self.job_name),
priority: self.priority,
created_at: self.created_at,
next_retry: self.next_retry,
status: Self::cstr_to_string(&self.status),
retries: self.retries,
}
}
fn cstr_to_string(arr: &[c_char]) -> String {
let bytes: Vec<u8> = arr.iter()
.take_while(|&&c| c != 0)
.map(|&c| c as u8)
.collect();
String::from_utf8_lossy(&bytes).to_string()
}
}
/// Open or create a queue index at the given directory
///
/// # Safety
/// path must be a valid null-terminated UTF-8 string
#[no_mangle]
pub unsafe extern "C" fn qi_open(path: *const c_char) -> *mut QiIndex {
if path.is_null() {
return ptr::null_mut();
}
let path_str = match CStr::from_ptr(path).to_str() {
Ok(s) => s,
Err(_) => return ptr::null_mut(),
};
let path_buf = PathBuf::from(path_str);
let result = std::panic::catch_unwind(|| {
match QueueIndexImpl::open(path_buf) {
Ok(inner) => {
let index = QiIndex {
inner: Arc::new(Mutex::new(inner)),
last_error: Mutex::new(None),
};
Box::into_raw(Box::new(index))
}
Err(e) => {
eprintln!("Failed to open queue index: {}", e);
ptr::null_mut()
}
}
});
match result {
Ok(ptr) => ptr,
Err(_) => {
eprintln!("Panic in qi_open");
ptr::null_mut()
}
}
}
/// Close and free a queue index
///
/// # Safety
/// idx must be a valid pointer returned by qi_open, or null
#[no_mangle]
pub unsafe extern "C" fn qi_close(idx: *mut QiIndex) {
if !idx.is_null() {
let _ = std::panic::catch_unwind(|| {
drop(Box::from_raw(idx));
});
}
}
/// Add tasks to the index in a batch
///
/// # Safety
/// idx must be valid, tasks must point to count valid QiTask structs
#[no_mangle]
pub unsafe extern "C" fn qi_add_tasks(
idx: *mut QiIndex,
tasks: *const QiTask,
count: u32,
) -> c_int {
if idx.is_null() || tasks.is_null() || count == 0 {
return -1;
}
let result = std::panic::catch_unwind(|| {
let index = &*idx;
let mut inner = index.inner.lock().unwrap();
let task_slice = std::slice::from_raw_parts(tasks, count as usize);
let rust_tasks: Vec<Task> = task_slice.iter().map(|t| t.to_task()).collect();
match inner.add_tasks(&rust_tasks) {
Ok(added) => added as c_int,
Err(e) => {
let mut error_guard = index.last_error.lock().unwrap();
*error_guard = Some(e.to_string());
-1
}
}
});
match result {
Ok(n) => n,
Err(_) => {
eprintln!("Panic in qi_add_tasks");
-1
}
}
}
/// Get the next batch of tasks from the priority queue
///
/// # Safety
/// idx must be valid, out_tasks must have space for max_count tasks, out_count must be valid
#[no_mangle]
pub unsafe extern "C" fn qi_get_next_batch(
idx: *mut QiIndex,
out_tasks: *mut QiTask,
max_count: u32,
out_count: *mut u32,
) -> c_int {
if idx.is_null() || out_tasks.is_null() || out_count.is_null() || max_count == 0 {
return -1;
}
let result = std::panic::catch_unwind(|| {
let index = &*idx;
let mut inner = index.inner.lock().unwrap();
match inner.get_next_batch(max_count as usize) {
Ok(tasks) => {
let count = tasks.len().min(max_count as usize);
let out_slice = std::slice::from_raw_parts_mut(out_tasks, count);
for (i, task) in tasks.iter().take(count).enumerate() {
out_slice[i] = QiTask::from_task(&task);
}
*out_count = count as u32;
0
}
Err(e) => {
let mut error_guard = index.last_error.lock().unwrap();
*error_guard = Some(e.to_string());
-1
}
}
});
match result {
Ok(rc) => rc,
Err(_) => {
eprintln!("Panic in qi_get_next_batch");
-1
}
}
}
/// Peek at the next task without removing it
///
/// # Safety
/// idx must be valid, out_task must be valid
#[no_mangle]
pub unsafe extern "C" fn qi_peek_next(
idx: *mut QiIndex,
out_task: *mut QiTask,
) -> c_int {
if idx.is_null() || out_task.is_null() {
return -1;
}
let result = std::panic::catch_unwind(|| {
let index = &*idx;
let inner = index.inner.lock().unwrap();
match inner.peek_next() {
Some(task) => {
*out_task = QiTask::from_task(&task);
0
}
None => -1 // No ready tasks
}
});
match result {
Ok(rc) => rc,
Err(_) => {
eprintln!("Panic in qi_peek_next");
-1
}
}
}
/// Get a task by ID
///
/// # Safety
/// idx must be valid, task_id must be a valid null-terminated string, out_task must be valid
#[no_mangle]
pub unsafe extern "C" fn qi_get_task_by_id(
idx: *mut QiIndex,
task_id: *const c_char,
out_task: *mut QiTask,
) -> c_int {
if idx.is_null() || task_id.is_null() || out_task.is_null() {
return -1;
}
let id_str = match CStr::from_ptr(task_id).to_str() {
Ok(s) => s,
Err(_) => return -1,
};
let result = std::panic::catch_unwind(|| {
let index = &*idx;
let inner = index.inner.lock().unwrap();
match inner.get_task_by_id(id_str) {
Some(task) => {
*out_task = QiTask::from_task(&task);
0
}
None => -1 // Task not found
}
});
match result {
Ok(rc) => rc,
Err(_) => {
eprintln!("Panic in qi_get_task_by_id");
-1
}
}
}
/// Get the last error message for an index
///
/// # Safety
/// idx must be valid. Returns a static string that must not be freed.
#[no_mangle]
pub unsafe extern "C" fn qi_last_error(idx: *mut QiIndex) -> *const c_char {
if idx.is_null() {
return ptr::null();
}
let result = std::panic::catch_unwind(|| {
let index = &*idx;
let error_guard = index.last_error.lock().unwrap();
match error_guard.as_ref() {
Some(err) => {
// Leak the CString to return a stable pointer
// Caller must not free this
CString::new(err.clone()).unwrap().into_raw()
}
None => ptr::null(),
}
});
match result {
Ok(ptr) => ptr,
Err(_) => ptr::null(),
}
}
/// Clear the last error
///
/// # Safety
/// idx must be valid
#[no_mangle]
pub unsafe extern "C" fn qi_clear_error(idx: *mut QiIndex) {
if idx.is_null() {
return;
}
let _ = std::panic::catch_unwind(|| {
let index = &*idx;
let mut error_guard = index.last_error.lock().unwrap();
*error_guard = None;
});
}
/// Get the count of tasks with a given status
///
/// # Safety
/// idx must be valid, status must be a null-terminated string
#[no_mangle]
pub unsafe extern "C" fn qi_get_task_count(
idx: *mut QiIndex,
status: *const c_char,
) -> usize {
if idx.is_null() || status.is_null() {
return 0;
}
let status_str = match CStr::from_ptr(status).to_str() {
Ok(s) => s,
Err(_) => return 0,
};
let result = std::panic::catch_unwind(|| {
let index = &*idx;
let inner = index.inner.lock().unwrap();
inner.get_task_count(status_str)
});
match result {
Ok(count) => count,
Err(_) => 0,
}
}
/// Get all tasks ( allocates array - caller must call qi_free_task_array)
///
/// # Safety
/// idx must be valid, out_tasks and count must be valid pointers
#[no_mangle]
pub unsafe extern "C" fn qi_get_all_tasks(
idx: *mut QiIndex,
out_tasks: *mut *mut QiTask,
count: *mut usize,
) -> c_int {
if idx.is_null() || out_tasks.is_null() || count.is_null() {
return -1;
}
let result = std::panic::catch_unwind(|| {
let index = &*idx;
let inner = index.inner.lock().unwrap();
let tasks = inner.get_all_tasks();
let len = tasks.len();
if len == 0 {
*out_tasks = ptr::null_mut();
*count = 0;
return 0;
}
// Allocate array
let layout = std::alloc::Layout::array::<QiTask>(len).unwrap();
let ptr = std::alloc::alloc(layout) as *mut QiTask;
if ptr.is_null() {
return -1;
}
// Copy tasks
for (i, task) in tasks.iter().enumerate() {
std::ptr::write(ptr.add(i), QiTask::from_task(&task));
}
*out_tasks = ptr;
*count = len;
0
});
match result {
Ok(rc) => rc,
Err(_) => -1,
}
}
/// Get tasks by status
///
/// # Safety
/// idx must be valid, status must be a valid null-terminated string
/// out_tasks and count must be valid pointers
#[no_mangle]
pub unsafe extern "C" fn qi_get_tasks_by_status(
idx: *mut QiIndex,
status: *const c_char,
out_tasks: *mut *mut QiTask,
count: *mut usize,
) -> c_int {
if idx.is_null() || status.is_null() || out_tasks.is_null() || count.is_null() {
return -1;
}
let status_str = match CStr::from_ptr(status).to_str() {
Ok(s) => s,
Err(_) => return -1,
};
let result = std::panic::catch_unwind(|| {
let index = &*idx;
let inner = index.inner.lock().unwrap();
let all_tasks = inner.get_all_tasks();
let filtered: Vec<Task> = all_tasks
.into_iter()
.filter(|t| t.status == status_str)
.collect();
let len = filtered.len();
if len == 0 {
*out_tasks = ptr::null_mut();
*count = 0;
return 0;
}
// Allocate array
let layout = std::alloc::Layout::array::<QiTask>(len).unwrap();
let ptr = std::alloc::alloc(layout) as *mut QiTask;
if ptr.is_null() {
return -1;
}
// Copy tasks
for (i, task) in filtered.iter().enumerate() {
std::ptr::write(ptr.add(i), QiTask::from_task(task));
}
*out_tasks = ptr;
*count = len;
0
});
match result {
Ok(rc) => rc,
Err(_) => -1,
}
}
/// Update tasks by ID
///
/// # Safety
/// idx must be valid, tasks must point to count valid QiTask structs
#[no_mangle]
pub unsafe extern "C" fn qi_update_tasks(
idx: *mut QiIndex,
tasks: *const QiTask,
count: u32,
) -> c_int {
if idx.is_null() || tasks.is_null() || count == 0 {
return -1;
}
let result = std::panic::catch_unwind(|| {
let index = &*idx;
let mut inner = index.inner.lock().unwrap();
let task_slice = std::slice::from_raw_parts(tasks, count as usize);
let rust_tasks: Vec<Task> = task_slice.iter().map(|t| t.to_task()).collect();
inner.update_tasks(&rust_tasks) as c_int
});
match result {
Ok(n) => n,
Err(_) => -1,
}
}
/// Remove tasks by ID
///
/// # Safety
/// idx must be valid, task_ids must point to count valid null-terminated strings
#[no_mangle]
pub unsafe extern "C" fn qi_remove_tasks(
idx: *mut QiIndex,
task_ids: *const *const c_char,
count: u32,
) -> c_int {
if idx.is_null() || task_ids.is_null() || count == 0 {
return -1;
}
let result = std::panic::catch_unwind(|| {
let index = &*idx;
let mut inner = index.inner.lock().unwrap();
let ids_slice = std::slice::from_raw_parts(task_ids, count as usize);
let ids: Vec<String> = ids_slice
.iter()
.filter_map(|&p| if p.is_null() { None } else { CStr::from_ptr(p).to_str().ok().map(|s| s.to_string()) })
.collect();
inner.remove_tasks(&ids) as c_int
});
match result {
Ok(n) => n,
Err(_) => -1,
}
}
/// Retry a task
///
/// # Safety
/// idx must be valid, task_id must be a valid null-terminated string
#[no_mangle]
pub unsafe extern "C" fn qi_retry_task(
idx: *mut QiIndex,
task_id: *const c_char,
next_retry_at: i64,
max_retries: u32,
) -> c_int {
if idx.is_null() || task_id.is_null() {
return -1;
}
let id_str = match CStr::from_ptr(task_id).to_str() {
Ok(s) => s,
Err(_) => return -1,
};
let result = std::panic::catch_unwind(|| {
let index = &*idx;
let mut inner = index.inner.lock().unwrap();
match inner.retry_task(id_str, next_retry_at, max_retries) {
Ok(true) => 0,
Ok(false) => -1,
Err(_) => -1,
}
});
match result {
Ok(rc) => rc,
Err(_) => -1,
}
}
/// Move a task to DLQ
///
/// # Safety
/// idx must be valid, task_id and reason must be valid null-terminated strings
#[no_mangle]
pub unsafe extern "C" fn qi_move_to_dlq(
idx: *mut QiIndex,
task_id: *const c_char,
reason: *const c_char,
) -> c_int {
if idx.is_null() || task_id.is_null() || reason.is_null() {
return -1;
}
let id_str = match CStr::from_ptr(task_id).to_str() {
Ok(s) => s,
Err(_) => return -1,
};
let reason_str = match CStr::from_ptr(reason).to_str() {
Ok(s) => s,
Err(_) => return -1,
};
let result = std::panic::catch_unwind(|| {
let index = &*idx;
let mut inner = index.inner.lock().unwrap();
match inner.move_to_dlq(id_str, reason_str) {
Ok(true) => 0,
Ok(false) => -1,
Err(_) => -1,
}
});
match result {
Ok(rc) => rc,
Err(_) => -1,
}
}
/// Renew lease for a task
///
/// # Safety
/// idx must be valid, all string args must be valid null-terminated strings
#[no_mangle]
pub unsafe extern "C" fn qi_renew_lease(
idx: *mut QiIndex,
task_id: *const c_char,
worker_id: *const c_char,
lease_expiry: i64,
) -> c_int {
if idx.is_null() || task_id.is_null() || worker_id.is_null() {
return -1;
}
let id_str = match CStr::from_ptr(task_id).to_str() {
Ok(s) => s,
Err(_) => return -1,
};
let worker_str = match CStr::from_ptr(worker_id).to_str() {
Ok(s) => s,
Err(_) => return -1,
};
let result = std::panic::catch_unwind(|| {
let index = &*idx;
let mut inner = index.inner.lock().unwrap();
match inner.renew_lease(id_str, worker_str, lease_expiry) {
Ok(true) => 0,
Ok(false) => -1,
Err(_) => -1,
}
});
match result {
Ok(rc) => rc,
Err(_) => -1,
}
}
/// Release lease for a task
///
/// # Safety
/// idx must be valid, task_id and worker_id must be valid null-terminated strings
#[no_mangle]
pub unsafe extern "C" fn qi_release_lease(
idx: *mut QiIndex,
task_id: *const c_char,
worker_id: *const c_char,
) -> c_int {
if idx.is_null() || task_id.is_null() || worker_id.is_null() {
return -1;
}
let id_str = match CStr::from_ptr(task_id).to_str() {
Ok(s) => s,
Err(_) => return -1,
};
let worker_str = match CStr::from_ptr(worker_id).to_str() {
Ok(s) => s,
Err(_) => return -1,
};
let result = std::panic::catch_unwind(|| {
let index = &*idx;
let mut inner = index.inner.lock().unwrap();
match inner.release_lease(id_str, worker_str) {
Ok(true) => 0,
Ok(false) => -1,
Err(_) => -1,
}
});
match result {
Ok(rc) => rc,
Err(_) => -1,
}
}
/// Rebuild the index from storage
///
/// # Safety
/// idx must be valid
#[no_mangle]
pub unsafe extern "C" fn qi_rebuild_index(idx: *mut QiIndex) -> c_int {
if idx.is_null() {
return -1;
}
let result = std::panic::catch_unwind(|| {
let index = &*idx;
let mut inner = index.inner.lock().unwrap();
match inner.rebuild_index() {
Ok(_) => 0,
Err(_) => -1,
}
});
match result {
Ok(rc) => rc,
Err(_) => -1,
}
}
/// Compact the index storage
///
/// # Safety
/// idx must be valid
#[no_mangle]
pub unsafe extern "C" fn qi_compact_index(idx: *mut QiIndex) -> c_int {
if idx.is_null() {
return -1;
}
let result = std::panic::catch_unwind(|| {
let index = &*idx;
let mut inner = index.inner.lock().unwrap();
match inner.compact_index() {
Ok(_) => 0,
Err(_) => -1,
}
});
match result {
Ok(rc) => rc,
Err(_) => -1,
}
}
/// Free a task array allocated by qi_get_all_tasks
///
/// # Safety
/// tasks must be a valid pointer returned by qi_get_all_tasks, or null
#[no_mangle]
pub unsafe extern "C" fn qi_free_task_array(tasks: *mut QiTask, count: usize) {
if !tasks.is_null() && count > 0 {
let layout = std::alloc::Layout::array::<QiTask>(count).unwrap();
std::alloc::dealloc(tasks as *mut u8, layout);
}
}
/// Free a string array
///
/// # Safety
/// strings must be a valid pointer, or null
#[no_mangle]
pub unsafe extern "C" fn qi_free_string_array(strings: *mut *mut c_char, count: usize) {
if !strings.is_null() && count > 0 {
let slice = std::slice::from_raw_parts(strings, count);
for &s in slice {
if !s.is_null() {
let _ = CString::from_raw(s);
}
}
let layout = std::alloc::Layout::array::<*mut c_char>(count).unwrap();
std::alloc::dealloc(strings as *mut u8, layout);
}
}
/// Get index version
///
/// # Safety
/// idx must be valid
#[no_mangle]
pub unsafe extern "C" fn qi_get_index_version(idx: *mut QiIndex) -> u64 {
if idx.is_null() {
return 0;
}
1 // Version 1 for now
}
/// Get index modification time
///
/// # Safety
/// idx must be valid
#[no_mangle]
pub unsafe extern "C" fn qi_get_index_mtime(idx: *mut QiIndex) -> i64 {
if idx.is_null() {
return 0;
}
// Return current time as placeholder
chrono::Utc::now().timestamp()
}