fetch_ml/native/rust/queue_index/src/storage.rs
Jeremie Fraeys 7efefa1933
feat(native): implement Rust native layer as a test
- queue_index: mmap-based priority queue with safe storage wrapper
- dataset_hash: BLAKE3 parallel hashing with rayon
- common: FFI utilities with panic recovery
- Minimal deps: ~20 total (rayon, blake3, memmap2, walkdir, chrono)
- Drop crossbeam, prometheus - use stdlib + manual metrics
- Makefile: cargo build targets, help text updated
- Forgejo CI: clippy, tests, miri, cargo-deny
- C FFI compatible with existing Go bindings
2026-03-14 17:45:58 -04:00

202 lines
5.2 KiB
Rust

//! Memory-mapped storage with safe access patterns
//!
//! Design: Unsafe raw pointers are contained within RawStorage.
//! All public access goes through IndexStorage with safe methods.
use memmap2::{MmapMut, MmapOptions};
use std::fs::{File, OpenOptions};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::sync::Arc;
/// Header stored at the beginning of the mmap file
#[repr(C)]
#[derive(Debug, Clone, Copy)]
pub struct IndexHeader {
pub version: u64,
pub magic: [u8; 8],
pub entry_count: u64,
pub last_modified: i64,
pub checksum: u64,
}
impl IndexHeader {
pub const MAGIC: [u8; 8] = *b"FETCHIDX";
pub const VERSION: u64 = 1;
pub const SIZE: usize = std::mem::size_of::<IndexHeader>();
pub fn new() -> Self {
Self {
version: Self::VERSION,
magic: Self::MAGIC,
entry_count: 0,
last_modified: 0,
checksum: 0,
}
}
pub fn is_valid(&self) -> bool {
self.magic == Self::MAGIC && self.version == Self::VERSION
}
}
/// Internal unsafe state - never exposed directly
struct RawStorage {
mmap: MmapMut,
header_ptr: *mut IndexHeader,
data_offset: usize,
}
impl RawStorage {
fn new(mmap: MmapMut) -> io::Result<Self> {
let ptr = mmap.as_ptr() as *mut u8;
let header_ptr = ptr as *mut IndexHeader;
Ok(Self {
mmap,
header_ptr,
data_offset: IndexHeader::SIZE,
})
}
fn header(&self) -> &IndexHeader {
unsafe { &*self.header_ptr }
}
fn header_mut(&mut self) -> &mut IndexHeader {
unsafe { &mut *self.header_ptr }
}
}
/// Public safe interface to mmap storage
pub struct IndexStorage {
raw: RawStorage,
path: PathBuf,
}
impl IndexStorage {
/// Open or create storage at the given path
pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> {
let path = path.as_ref().to_path_buf();
std::fs::create_dir_all(&path)?;
let file_path = path.join("index.dat");
let file = OpenOptions::new()
.read(true)
.write(true)
.create(true)
.open(&file_path)?;
// Ensure file is at least header size
let file_size = file.metadata()?.len() as usize;
let min_size = IndexHeader::SIZE;
if file_size < min_size {
let header = IndexHeader::new();
let header_bytes = unsafe {
std::slice::from_raw_parts(
&header as *const IndexHeader as *const u8,
IndexHeader::SIZE,
)
};
file.set_len(min_size as u64)?;
file.write_all_at(header_bytes, 0)?;
}
let mmap = unsafe { MmapOptions::new().map_mut(&file)? };
let raw = RawStorage::new(mmap)?;
Ok(Self { raw, path })
}
/// Get a reference to the header (read-only)
pub fn header(&self) -> &IndexHeader {
self.raw.header()
}
/// Get a mutable reference to the header
pub fn header_mut(&mut self) -> &mut IndexHeader {
self.raw.header_mut()
}
/// Verify header magic and version
pub fn verify(&self) -> io::Result<()> {
let header = self.header();
if !header.is_valid() {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
"Invalid index header (wrong magic or version)",
));
}
Ok(())
}
/// Get the path to the storage directory
pub fn path(&self) -> &Path {
&self.path
}
/// Flush changes to disk
pub fn flush(&mut self) -> io::Result<()> {
self.raw.mmap.flush()
}
}
/// Thread-safe wrapper for concurrent access
pub struct SharedStorage {
inner: Arc<Mutex<IndexStorage>>,
}
impl SharedStorage {
pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> {
let storage = IndexStorage::open(path)?;
Ok(Self {
inner: Arc::new(Mutex::new(storage)),
})
}
pub fn lock(&self) -> MutexGuard<IndexStorage> {
self.inner.lock().unwrap()
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
#[test]
fn test_storage_creation() {
let temp = TempDir::new().unwrap();
let storage = IndexStorage::open(temp.path()).unwrap();
assert!(storage.header().is_valid());
}
#[test]
fn test_storage_verify() {
let temp = TempDir::new().unwrap();
let storage = IndexStorage::open(temp.path()).unwrap();
assert!(storage.verify().is_ok());
}
#[test]
fn test_shared_storage() {
let temp = TempDir::new().unwrap();
let shared = SharedStorage::open(temp.path()).unwrap();
{
let storage = shared.lock();
assert!(storage.header().is_valid());
}
{
let mut storage = shared.lock();
storage.header_mut().entry_count = 42;
}
{
let storage = shared.lock();
assert_eq!(storage.header().entry_count, 42);
}
}
}