- queue_index: mmap-based priority queue with safe storage wrapper - dataset_hash: BLAKE3 parallel hashing with rayon - common: FFI utilities with panic recovery - Minimal deps: ~20 total (rayon, blake3, memmap2, walkdir, chrono) - Drop crossbeam, prometheus - use stdlib + manual metrics - Makefile: cargo build targets, help text updated - Forgejo CI: clippy, tests, miri, cargo-deny - C FFI compatible with existing Go bindings
202 lines
5.2 KiB
Rust
202 lines
5.2 KiB
Rust
//! Memory-mapped storage with safe access patterns
|
|
//!
|
|
//! Design: Unsafe raw pointers are contained within RawStorage.
|
|
//! All public access goes through IndexStorage with safe methods.
|
|
|
|
use memmap2::{MmapMut, MmapOptions};
|
|
use std::fs::{File, OpenOptions};
|
|
use std::io::{self, Write};
|
|
use std::path::{Path, PathBuf};
|
|
use std::sync::Arc;
|
|
|
|
/// Header stored at the beginning of the mmap file
|
|
#[repr(C)]
|
|
#[derive(Debug, Clone, Copy)]
|
|
pub struct IndexHeader {
|
|
pub version: u64,
|
|
pub magic: [u8; 8],
|
|
pub entry_count: u64,
|
|
pub last_modified: i64,
|
|
pub checksum: u64,
|
|
}
|
|
|
|
impl IndexHeader {
|
|
pub const MAGIC: [u8; 8] = *b"FETCHIDX";
|
|
pub const VERSION: u64 = 1;
|
|
pub const SIZE: usize = std::mem::size_of::<IndexHeader>();
|
|
|
|
pub fn new() -> Self {
|
|
Self {
|
|
version: Self::VERSION,
|
|
magic: Self::MAGIC,
|
|
entry_count: 0,
|
|
last_modified: 0,
|
|
checksum: 0,
|
|
}
|
|
}
|
|
|
|
pub fn is_valid(&self) -> bool {
|
|
self.magic == Self::MAGIC && self.version == Self::VERSION
|
|
}
|
|
}
|
|
|
|
/// Internal unsafe state - never exposed directly
|
|
struct RawStorage {
|
|
mmap: MmapMut,
|
|
header_ptr: *mut IndexHeader,
|
|
data_offset: usize,
|
|
}
|
|
|
|
impl RawStorage {
|
|
fn new(mmap: MmapMut) -> io::Result<Self> {
|
|
let ptr = mmap.as_ptr() as *mut u8;
|
|
let header_ptr = ptr as *mut IndexHeader;
|
|
|
|
Ok(Self {
|
|
mmap,
|
|
header_ptr,
|
|
data_offset: IndexHeader::SIZE,
|
|
})
|
|
}
|
|
|
|
fn header(&self) -> &IndexHeader {
|
|
unsafe { &*self.header_ptr }
|
|
}
|
|
|
|
fn header_mut(&mut self) -> &mut IndexHeader {
|
|
unsafe { &mut *self.header_ptr }
|
|
}
|
|
}
|
|
|
|
/// Public safe interface to mmap storage
|
|
pub struct IndexStorage {
|
|
raw: RawStorage,
|
|
path: PathBuf,
|
|
}
|
|
|
|
impl IndexStorage {
|
|
/// Open or create storage at the given path
|
|
pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> {
|
|
let path = path.as_ref().to_path_buf();
|
|
std::fs::create_dir_all(&path)?;
|
|
|
|
let file_path = path.join("index.dat");
|
|
let file = OpenOptions::new()
|
|
.read(true)
|
|
.write(true)
|
|
.create(true)
|
|
.open(&file_path)?;
|
|
|
|
// Ensure file is at least header size
|
|
let file_size = file.metadata()?.len() as usize;
|
|
let min_size = IndexHeader::SIZE;
|
|
|
|
if file_size < min_size {
|
|
let header = IndexHeader::new();
|
|
let header_bytes = unsafe {
|
|
std::slice::from_raw_parts(
|
|
&header as *const IndexHeader as *const u8,
|
|
IndexHeader::SIZE,
|
|
)
|
|
};
|
|
file.set_len(min_size as u64)?;
|
|
file.write_all_at(header_bytes, 0)?;
|
|
}
|
|
|
|
let mmap = unsafe { MmapOptions::new().map_mut(&file)? };
|
|
let raw = RawStorage::new(mmap)?;
|
|
|
|
Ok(Self { raw, path })
|
|
}
|
|
|
|
/// Get a reference to the header (read-only)
|
|
pub fn header(&self) -> &IndexHeader {
|
|
self.raw.header()
|
|
}
|
|
|
|
/// Get a mutable reference to the header
|
|
pub fn header_mut(&mut self) -> &mut IndexHeader {
|
|
self.raw.header_mut()
|
|
}
|
|
|
|
/// Verify header magic and version
|
|
pub fn verify(&self) -> io::Result<()> {
|
|
let header = self.header();
|
|
if !header.is_valid() {
|
|
return Err(io::Error::new(
|
|
io::ErrorKind::InvalidData,
|
|
"Invalid index header (wrong magic or version)",
|
|
));
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
/// Get the path to the storage directory
|
|
pub fn path(&self) -> &Path {
|
|
&self.path
|
|
}
|
|
|
|
/// Flush changes to disk
|
|
pub fn flush(&mut self) -> io::Result<()> {
|
|
self.raw.mmap.flush()
|
|
}
|
|
}
|
|
|
|
/// Thread-safe wrapper for concurrent access
|
|
pub struct SharedStorage {
|
|
inner: Arc<Mutex<IndexStorage>>,
|
|
}
|
|
|
|
impl SharedStorage {
|
|
pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> {
|
|
let storage = IndexStorage::open(path)?;
|
|
Ok(Self {
|
|
inner: Arc::new(Mutex::new(storage)),
|
|
})
|
|
}
|
|
|
|
pub fn lock(&self) -> MutexGuard<IndexStorage> {
|
|
self.inner.lock().unwrap()
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use tempfile::TempDir;
|
|
|
|
#[test]
|
|
fn test_storage_creation() {
|
|
let temp = TempDir::new().unwrap();
|
|
let storage = IndexStorage::open(temp.path()).unwrap();
|
|
assert!(storage.header().is_valid());
|
|
}
|
|
|
|
#[test]
|
|
fn test_storage_verify() {
|
|
let temp = TempDir::new().unwrap();
|
|
let storage = IndexStorage::open(temp.path()).unwrap();
|
|
assert!(storage.verify().is_ok());
|
|
}
|
|
|
|
#[test]
|
|
fn test_shared_storage() {
|
|
let temp = TempDir::new().unwrap();
|
|
let shared = SharedStorage::open(temp.path()).unwrap();
|
|
|
|
{
|
|
let storage = shared.lock();
|
|
assert!(storage.header().is_valid());
|
|
}
|
|
|
|
{
|
|
let mut storage = shared.lock();
|
|
storage.header_mut().entry_count = 42;
|
|
}
|
|
|
|
{
|
|
let storage = shared.lock();
|
|
assert_eq!(storage.header().entry_count, 42);
|
|
}
|
|
}
|
|
}
|