#ifndef DATASET_HASH_H #define DATASET_HASH_H #include #include #ifdef __cplusplus extern "C" { #endif // Opaque handle for hash context typedef struct fh_context fh_context_t; // Initialize hash context with thread pool // num_threads: 0 = auto-detect (use number of CPU cores, capped at 8) fh_context_t* fh_init(uint32_t num_threads); // Cleanup context void fh_cleanup(fh_context_t* ctx); // Hash a single file (mmap + SIMD SHA256) // Returns: hex string (caller frees with fh_free_string) // Note: For batch operations, use fh_hash_directory_batch to amortize CGo overhead char* fh_hash_file(fh_context_t* ctx, const char* path); // Hash a directory's contents recursively and deterministically. // // The hash is computed over: // - All regular files (S_ISREG) in the directory tree // - Recursively traverses subdirectories (max depth 32) // - Sorted lexicographically by full path for reproducibility // - Excludes hidden files (names starting with '.') // - Excludes symlinks, devices, and special files // // The combined hash is SHA256(SHA256(file1) + SHA256(file2) + ...) // where files are processed in lexicographically sorted order. // // Returns: hex string (caller frees with fh_free_string), or NULL on error char* fh_hash_directory(fh_context_t* ctx, const char* path); // Batch hash multiple files (single CGo call for entire batch) // paths: array of file paths // count: number of paths // out_hashes: pre-allocated array of 65-char buffers (64 hex + null terminator) // Returns: 0 on success, -1 on error int fh_hash_batch(fh_context_t* ctx, const char** paths, uint32_t count, char** out_hashes); // Hash directory with batch output (get individual file hashes) // out_hashes: pre-allocated array of 65-char buffers // out_paths: optional array of path buffers (can be NULL) // max_results: size of output arrays // out_count: actual number of files hashed // Returns: 0 on success, -1 on error int fh_hash_directory_batch( fh_context_t* ctx, const char* dir_path, char** out_hashes, char** out_paths, uint32_t max_results, uint32_t* out_count ); // Simple combined hash (single CGo call, single result) // Best for: quick directory hash verification // Returns: hex string (caller frees with fh_free_string) char* fh_hash_directory_combined(fh_context_t* ctx, const char* dir_path); // Free string returned by library void fh_free_string(char* str); // Constant-time hash comparison (prevents timing attacks) // Returns: 1 if hashes are equal, 0 if not equal // Timing is independent of the content (constant-time) int fh_hashes_equal(const char* hash_a, const char* hash_b); // Error handling const char* fh_last_error(fh_context_t* ctx); void fh_clear_error(fh_context_t* ctx); // Configuration void fh_set_buffer_size(fh_context_t* ctx, size_t buffer_size); size_t fh_get_buffer_size(fh_context_t* ctx); // SIMD detection int fh_has_simd_sha256(void); // Returns 1 if SIMD SHA256 available, 0 otherwise const char* fh_get_simd_impl_name(void); // Returns "SHA-NI", "ARMv8", or "generic" #ifdef __cplusplus } #endif #endif // DATASET_HASH_H