fetch_ml/native/common/include/thread_pool.h
Jeremie Fraeys 43d241c28d
feat: implement C++ native libraries for performance-critical operations
- Add arena allocator for zero-allocation hot paths
- Add thread pool for parallel operations
- Add mmap utilities for memory-mapped I/O
- Implement queue_index with heap-based priority queue
- Implement dataset_hash with SIMD support (SHA-NI, ARMv8)
- Add runtime SIMD detection for cross-platform correctness
- Add comprehensive tests and benchmarks
2026-02-16 20:38:04 -05:00

53 lines
1.3 KiB
C++

#pragma once
#include <condition_variable>
#include <functional>
#include <mutex>
#include <queue>
#include <thread>
#include <vector>
#include <cstdint>
// Fixed-size thread pool for parallel operations.
// Minimizes thread creation overhead for batch operations.
class ThreadPool {
std::vector<std::thread> workers_;
std::queue<std::function<void()>> tasks_;
std::mutex queue_mutex_;
std::condition_variable condition_;
bool stop_ = false;
public:
explicit ThreadPool(size_t num_threads);
~ThreadPool();
// Add task to queue. Thread-safe.
void enqueue(std::function<void()> task);
// Wait for all queued tasks to complete
void wait_all();
// Get optimal thread count (capped at 8 for I/O bound work)
static uint32_t default_thread_count();
};
// Synchronization primitive: wait for N completions
class CompletionLatch {
std::atomic<size_t> count_{0};
std::mutex mutex_;
std::condition_variable cv_;
public:
explicit CompletionLatch(size_t total) : count_(total) {}
void arrive() {
if (--count_ == 0) {
std::lock_guard<std::mutex> lock(mutex_);
cv_.notify_all();
}
}
void wait() {
std::unique_lock<std::mutex> lock(mutex_);
cv_.wait(lock, [&] { return count_.load() == 0; });
}
};