- Add arena allocator for zero-allocation hot paths - Add thread pool for parallel operations - Add mmap utilities for memory-mapped I/O - Implement queue_index with heap-based priority queue - Implement dataset_hash with SIMD support (SHA-NI, ARMv8) - Add runtime SIMD detection for cross-platform correctness - Add comprehensive tests and benchmarks
53 lines
1.3 KiB
C++
53 lines
1.3 KiB
C++
#pragma once
|
|
#include <condition_variable>
|
|
#include <functional>
|
|
#include <mutex>
|
|
#include <queue>
|
|
#include <thread>
|
|
#include <vector>
|
|
#include <cstdint>
|
|
|
|
// Fixed-size thread pool for parallel operations.
|
|
// Minimizes thread creation overhead for batch operations.
|
|
class ThreadPool {
|
|
std::vector<std::thread> workers_;
|
|
std::queue<std::function<void()>> tasks_;
|
|
std::mutex queue_mutex_;
|
|
std::condition_variable condition_;
|
|
bool stop_ = false;
|
|
|
|
public:
|
|
explicit ThreadPool(size_t num_threads);
|
|
~ThreadPool();
|
|
|
|
// Add task to queue. Thread-safe.
|
|
void enqueue(std::function<void()> task);
|
|
|
|
// Wait for all queued tasks to complete
|
|
void wait_all();
|
|
|
|
// Get optimal thread count (capped at 8 for I/O bound work)
|
|
static uint32_t default_thread_count();
|
|
};
|
|
|
|
// Synchronization primitive: wait for N completions
|
|
class CompletionLatch {
|
|
std::atomic<size_t> count_{0};
|
|
std::mutex mutex_;
|
|
std::condition_variable cv_;
|
|
|
|
public:
|
|
explicit CompletionLatch(size_t total) : count_(total) {}
|
|
|
|
void arrive() {
|
|
if (--count_ == 0) {
|
|
std::lock_guard<std::mutex> lock(mutex_);
|
|
cv_.notify_all();
|
|
}
|
|
}
|
|
|
|
void wait() {
|
|
std::unique_lock<std::mutex> lock(mutex_);
|
|
cv_.wait(lock, [&] { return count_.load() == 0; });
|
|
}
|
|
};
|