fetch_ml/native/common/src/mmap_utils.cpp
Jeremie Fraeys 7efe8bbfbf
native: security hardening, research trustworthiness, and CVE mitigations
Security Fixes:
- CVE-2024-45339: Add O_EXCL flag to temp file creation in storage_write_entries()
  Prevents symlink attacks on predictable .tmp file paths
- CVE-2025-47290: Use openat_nofollow() in storage_open()
  Closes TOCTOU race condition via path_sanitizer infrastructure
- CVE-2025-0838: Add MAX_BATCH_SIZE=10000 to add_tasks()
  Prevents integer overflow in batch operations

Research Trustworthiness (dataset_hash):
- Deterministic file ordering: std::sort after collect_files()
- Recursive directory traversal: depth-limited with cycle detection
- Documented exclusions: hidden files and special files noted in API

Bug Fixes:
- R1: storage_init path validation for non-existent directories
- R2: safe_strncpy return value check before strcat
- R3: parallel_hash 256-file cap replaced with std::vector
- R4: wire qi_compact_index/qi_rebuild_index stubs
- R5: CompletionLatch race condition fix (hold mutex during decrement)
- R6: ARMv8 SHA256 transform fix (save abcd_pre before vsha256hq_u32)
- R7: fuzz_index_storage header format fix
- R8: enforce null termination in add_tasks/update_tasks
- R9: use 64 bytes (not 65) in combined hash to exclude null terminator
- R10: status field persistence in save()

New Tests:
- test_recursive_dataset.cpp: Verify deterministic recursive hashing
- test_storage_symlink_resistance.cpp: Verify CVE-2024-45339 fix
- test_queue_index_batch_limit.cpp: Verify CVE-2025-0838 fix
- test_sha256_arm_kat.cpp: ARMv8 known-answer tests
- test_storage_init_new_dir.cpp: F1 verification
- test_parallel_hash_large_dir.cpp: F3 verification
- test_queue_index_compact.cpp: F4 verification

All 8 native tests passing. Library ready for research lab deployment.
2026-02-21 13:33:45 -05:00

148 lines
3.1 KiB
C++

#include "mmap_utils.h"
#include <sys/mman.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace fetchml::common {
MemoryMap::~MemoryMap() {
unmap();
}
MemoryMap::MemoryMap(MemoryMap&& other) noexcept
: addr_(other.addr_), size_(other.size_), fd_(other.fd_), writable_(other.writable_) {
other.addr_ = nullptr;
other.fd_ = -1;
}
MemoryMap& MemoryMap::operator=(MemoryMap&& other) noexcept {
if (this != &other) {
unmap();
addr_ = other.addr_;
size_ = other.size_;
fd_ = other.fd_;
writable_ = other.writable_;
other.addr_ = nullptr;
other.fd_ = -1;
}
return *this;
}
void MemoryMap::unmap() {
if (addr_ && addr_ != reinterpret_cast<void*>(-1)) {
munmap(addr_, size_);
addr_ = nullptr;
}
if (fd_ >= 0) {
close(fd_);
fd_ = -1;
}
}
void MemoryMap::sync() {
if (addr_ && writable_) {
msync(addr_, size_, MS_SYNC);
}
}
std::optional<MemoryMap> MemoryMap::map_read(const char* path) {
int fd = ::open(path, O_RDONLY | O_CLOEXEC);
if (fd < 0) return std::nullopt;
struct stat st;
if (fstat(fd, &st) < 0) {
::close(fd);
return std::nullopt;
}
if (st.st_size == 0) {
::close(fd);
return MemoryMap(); // Empty file - valid but no mapping
}
void* addr = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (addr == MAP_FAILED) {
::close(fd);
return std::nullopt;
}
MemoryMap mm;
mm.addr_ = addr;
mm.size_ = st.st_size;
mm.fd_ = fd;
mm.writable_ = false;
return mm;
}
FileHandle::FileHandle(const char* path, int flags, int mode) {
open(path, flags, mode);
}
FileHandle::~FileHandle() {
close();
}
FileHandle::FileHandle(FileHandle&& other) noexcept
: fd_(other.fd_), path_(std::move(other.path_)) {
other.fd_ = -1;
}
FileHandle& FileHandle::operator=(FileHandle&& other) noexcept {
if (this != &other) {
close();
fd_ = other.fd_;
path_ = std::move(other.path_);
other.fd_ = -1;
}
return *this;
}
bool FileHandle::open(const char* path, int flags, int mode) {
fd_ = ::open(path, flags | O_CLOEXEC, mode);
if (fd_ >= 0) {
path_ = path;
return true;
}
return false;
}
void FileHandle::close() {
if (fd_ >= 0) {
::close(fd_);
fd_ = -1;
}
}
ssize_t FileHandle::read(void* buf, size_t count, off_t offset) {
if (offset < 0) {
return ::read(fd_, buf, count);
}
return pread(fd_, buf, count, offset);
}
ssize_t FileHandle::write(const void* buf, size_t count, off_t offset) {
if (offset < 0) {
return ::write(fd_, buf, count);
}
return pwrite(fd_, buf, count, offset);
}
int64_t file_size(const char* path) {
struct stat st;
if (stat(path, &st) < 0) return -1;
return st.st_size;
}
bool ensure_dir(const char* path) {
try {
fs::create_directories(path);
return true;
} catch (...) {
return false;
}
}
} // namespace fetchml::common