//! Memory-mapped storage with safe access patterns //! //! Design: Unsafe raw pointers are contained within RawStorage. //! All public access goes through IndexStorage with safe methods. use memmap2::{MmapMut, MmapOptions}; use std::fs::{File, OpenOptions}; use std::io::{self, Write}; use std::path::{Path, PathBuf}; use std::sync::Arc; /// Header stored at the beginning of the mmap file #[repr(C)] #[derive(Debug, Clone, Copy)] pub struct IndexHeader { pub version: u64, pub magic: [u8; 8], pub entry_count: u64, pub last_modified: i64, pub checksum: u64, } impl IndexHeader { pub const MAGIC: [u8; 8] = *b"FETCHIDX"; pub const VERSION: u64 = 1; pub const SIZE: usize = std::mem::size_of::(); pub fn new() -> Self { Self { version: Self::VERSION, magic: Self::MAGIC, entry_count: 0, last_modified: 0, checksum: 0, } } pub fn is_valid(&self) -> bool { self.magic == Self::MAGIC && self.version == Self::VERSION } } /// Internal unsafe state - never exposed directly struct RawStorage { mmap: MmapMut, header_ptr: *mut IndexHeader, data_offset: usize, } impl RawStorage { fn new(mmap: MmapMut) -> io::Result { let ptr = mmap.as_ptr() as *mut u8; let header_ptr = ptr as *mut IndexHeader; Ok(Self { mmap, header_ptr, data_offset: IndexHeader::SIZE, }) } fn header(&self) -> &IndexHeader { unsafe { &*self.header_ptr } } fn header_mut(&mut self) -> &mut IndexHeader { unsafe { &mut *self.header_ptr } } } /// Public safe interface to mmap storage pub struct IndexStorage { raw: RawStorage, path: PathBuf, } impl IndexStorage { /// Open or create storage at the given path pub fn open>(path: P) -> io::Result { let path = path.as_ref().to_path_buf(); std::fs::create_dir_all(&path)?; let file_path = path.join("index.dat"); let file = OpenOptions::new() .read(true) .write(true) .create(true) .open(&file_path)?; // Ensure file is at least header size let file_size = file.metadata()?.len() as usize; let min_size = IndexHeader::SIZE; if file_size < min_size { let header = IndexHeader::new(); let header_bytes = unsafe { std::slice::from_raw_parts( &header as *const IndexHeader as *const u8, IndexHeader::SIZE, ) }; file.set_len(min_size as u64)?; file.write_all_at(header_bytes, 0)?; } let mmap = unsafe { MmapOptions::new().map_mut(&file)? }; let raw = RawStorage::new(mmap)?; Ok(Self { raw, path }) } /// Get a reference to the header (read-only) pub fn header(&self) -> &IndexHeader { self.raw.header() } /// Get a mutable reference to the header pub fn header_mut(&mut self) -> &mut IndexHeader { self.raw.header_mut() } /// Verify header magic and version pub fn verify(&self) -> io::Result<()> { let header = self.header(); if !header.is_valid() { return Err(io::Error::new( io::ErrorKind::InvalidData, "Invalid index header (wrong magic or version)", )); } Ok(()) } /// Get the path to the storage directory pub fn path(&self) -> &Path { &self.path } /// Flush changes to disk pub fn flush(&mut self) -> io::Result<()> { self.raw.mmap.flush() } } /// Thread-safe wrapper for concurrent access pub struct SharedStorage { inner: Arc>, } impl SharedStorage { pub fn open>(path: P) -> io::Result { let storage = IndexStorage::open(path)?; Ok(Self { inner: Arc::new(Mutex::new(storage)), }) } pub fn lock(&self) -> MutexGuard { self.inner.lock().unwrap() } } #[cfg(test)] mod tests { use super::*; use tempfile::TempDir; #[test] fn test_storage_creation() { let temp = TempDir::new().unwrap(); let storage = IndexStorage::open(temp.path()).unwrap(); assert!(storage.header().is_valid()); } #[test] fn test_storage_verify() { let temp = TempDir::new().unwrap(); let storage = IndexStorage::open(temp.path()).unwrap(); assert!(storage.verify().is_ok()); } #[test] fn test_shared_storage() { let temp = TempDir::new().unwrap(); let shared = SharedStorage::open(temp.path()).unwrap(); { let storage = shared.lock(); assert!(storage.header().is_valid()); } { let mut storage = shared.lock(); storage.header_mut().entry_count = 42; } { let storage = shared.lock(); assert_eq!(storage.header().entry_count, 42); } } }