fetch_ml/native/dataset_hash/threading/parallel_hash.h
Jeremie Fraeys 43d241c28d
feat: implement C++ native libraries for performance-critical operations
- Add arena allocator for zero-allocation hot paths
- Add thread pool for parallel operations
- Add mmap utilities for memory-mapped I/O
- Implement queue_index with heap-based priority queue
- Implement dataset_hash with SIMD support (SHA-NI, ARMv8)
- Add runtime SIMD detection for cross-platform correctness
- Add comprehensive tests and benchmarks
2026-02-16 20:38:04 -05:00

34 lines
994 B
C

#pragma once
#include "../io/file_hash.h"
#include "../../common/include/thread_pool.h"
#include <stddef.h>
#include <stdint.h>
// Parallel directory hashing with combined result
struct ParallelHasher {
ThreadPool* pool;
size_t buffer_size;
};
// Initialize parallel hasher
// Returns false on error
int parallel_hasher_init(ParallelHasher* hasher, uint32_t num_threads, size_t buffer_size);
// Cleanup
void parallel_hasher_cleanup(ParallelHasher* hasher);
// Hash directory - writes combined hash to out_hash (65 bytes)
int parallel_hash_directory(ParallelHasher* hasher, const char* path, char* out_hash);
// Hash directory with individual file results
// out_hashes: pre-allocated array of 65-char buffers
// out_paths: optional array of path buffers
// out_count: actual number of files hashed
int parallel_hash_directory_batch(
ParallelHasher* hasher,
const char* path,
char** out_hashes,
char** out_paths,
uint32_t max_results,
uint32_t* out_count
);