Security Fixes: - CVE-2024-45339: Add O_EXCL flag to temp file creation in storage_write_entries() Prevents symlink attacks on predictable .tmp file paths - CVE-2025-47290: Use openat_nofollow() in storage_open() Closes TOCTOU race condition via path_sanitizer infrastructure - CVE-2025-0838: Add MAX_BATCH_SIZE=10000 to add_tasks() Prevents integer overflow in batch operations Research Trustworthiness (dataset_hash): - Deterministic file ordering: std::sort after collect_files() - Recursive directory traversal: depth-limited with cycle detection - Documented exclusions: hidden files and special files noted in API Bug Fixes: - R1: storage_init path validation for non-existent directories - R2: safe_strncpy return value check before strcat - R3: parallel_hash 256-file cap replaced with std::vector - R4: wire qi_compact_index/qi_rebuild_index stubs - R5: CompletionLatch race condition fix (hold mutex during decrement) - R6: ARMv8 SHA256 transform fix (save abcd_pre before vsha256hq_u32) - R7: fuzz_index_storage header format fix - R8: enforce null termination in add_tasks/update_tasks - R9: use 64 bytes (not 65) in combined hash to exclude null terminator - R10: status field persistence in save() New Tests: - test_recursive_dataset.cpp: Verify deterministic recursive hashing - test_storage_symlink_resistance.cpp: Verify CVE-2024-45339 fix - test_queue_index_batch_limit.cpp: Verify CVE-2025-0838 fix - test_sha256_arm_kat.cpp: ARMv8 known-answer tests - test_storage_init_new_dir.cpp: F1 verification - test_parallel_hash_large_dir.cpp: F3 verification - test_queue_index_compact.cpp: F4 verification All 8 native tests passing. Library ready for research lab deployment.
148 lines
3.1 KiB
C++
148 lines
3.1 KiB
C++
#include "mmap_utils.h"
|
|
#include <sys/mman.h>
|
|
#include <sys/stat.h>
|
|
#include <fcntl.h>
|
|
#include <unistd.h>
|
|
#include <filesystem>
|
|
|
|
namespace fs = std::filesystem;
|
|
namespace fetchml::common {
|
|
|
|
MemoryMap::~MemoryMap() {
|
|
unmap();
|
|
}
|
|
|
|
MemoryMap::MemoryMap(MemoryMap&& other) noexcept
|
|
: addr_(other.addr_), size_(other.size_), fd_(other.fd_), writable_(other.writable_) {
|
|
other.addr_ = nullptr;
|
|
other.fd_ = -1;
|
|
}
|
|
|
|
MemoryMap& MemoryMap::operator=(MemoryMap&& other) noexcept {
|
|
if (this != &other) {
|
|
unmap();
|
|
addr_ = other.addr_;
|
|
size_ = other.size_;
|
|
fd_ = other.fd_;
|
|
writable_ = other.writable_;
|
|
other.addr_ = nullptr;
|
|
other.fd_ = -1;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
void MemoryMap::unmap() {
|
|
if (addr_ && addr_ != reinterpret_cast<void*>(-1)) {
|
|
munmap(addr_, size_);
|
|
addr_ = nullptr;
|
|
}
|
|
if (fd_ >= 0) {
|
|
close(fd_);
|
|
fd_ = -1;
|
|
}
|
|
}
|
|
|
|
void MemoryMap::sync() {
|
|
if (addr_ && writable_) {
|
|
msync(addr_, size_, MS_SYNC);
|
|
}
|
|
}
|
|
|
|
std::optional<MemoryMap> MemoryMap::map_read(const char* path) {
|
|
int fd = ::open(path, O_RDONLY | O_CLOEXEC);
|
|
if (fd < 0) return std::nullopt;
|
|
|
|
struct stat st;
|
|
if (fstat(fd, &st) < 0) {
|
|
::close(fd);
|
|
return std::nullopt;
|
|
}
|
|
|
|
if (st.st_size == 0) {
|
|
::close(fd);
|
|
return MemoryMap(); // Empty file - valid but no mapping
|
|
}
|
|
|
|
void* addr = mmap(nullptr, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
|
|
if (addr == MAP_FAILED) {
|
|
::close(fd);
|
|
return std::nullopt;
|
|
}
|
|
|
|
MemoryMap mm;
|
|
mm.addr_ = addr;
|
|
mm.size_ = st.st_size;
|
|
mm.fd_ = fd;
|
|
mm.writable_ = false;
|
|
return mm;
|
|
}
|
|
|
|
FileHandle::FileHandle(const char* path, int flags, int mode) {
|
|
open(path, flags, mode);
|
|
}
|
|
|
|
FileHandle::~FileHandle() {
|
|
close();
|
|
}
|
|
|
|
FileHandle::FileHandle(FileHandle&& other) noexcept
|
|
: fd_(other.fd_), path_(std::move(other.path_)) {
|
|
other.fd_ = -1;
|
|
}
|
|
|
|
FileHandle& FileHandle::operator=(FileHandle&& other) noexcept {
|
|
if (this != &other) {
|
|
close();
|
|
fd_ = other.fd_;
|
|
path_ = std::move(other.path_);
|
|
other.fd_ = -1;
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
bool FileHandle::open(const char* path, int flags, int mode) {
|
|
fd_ = ::open(path, flags | O_CLOEXEC, mode);
|
|
if (fd_ >= 0) {
|
|
path_ = path;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void FileHandle::close() {
|
|
if (fd_ >= 0) {
|
|
::close(fd_);
|
|
fd_ = -1;
|
|
}
|
|
}
|
|
|
|
ssize_t FileHandle::read(void* buf, size_t count, off_t offset) {
|
|
if (offset < 0) {
|
|
return ::read(fd_, buf, count);
|
|
}
|
|
return pread(fd_, buf, count, offset);
|
|
}
|
|
|
|
ssize_t FileHandle::write(const void* buf, size_t count, off_t offset) {
|
|
if (offset < 0) {
|
|
return ::write(fd_, buf, count);
|
|
}
|
|
return pwrite(fd_, buf, count, offset);
|
|
}
|
|
|
|
int64_t file_size(const char* path) {
|
|
struct stat st;
|
|
if (stat(path, &st) < 0) return -1;
|
|
return st.st_size;
|
|
}
|
|
|
|
bool ensure_dir(const char* path) {
|
|
try {
|
|
fs::create_directories(path);
|
|
return true;
|
|
} catch (...) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
} // namespace fetchml::common
|