fetch_ml/native/dataset_hash/dataset_hash.h
Jeremie Fraeys d408a60eb1
Some checks failed
Documentation / build-and-publish (push) Waiting to run
Test / test (push) Waiting to run
Checkout test / test (push) Successful in 5s
CI with Native Libraries / test-native (push) Has been cancelled
CI with Native Libraries / build-release (push) Has been cancelled
ci: push all workflow updates
2026-02-12 13:28:15 -05:00

77 lines
2.4 KiB
C

#ifndef DATASET_HASH_H
#define DATASET_HASH_H
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
// Opaque handle for hash context
typedef struct fh_context fh_context_t;
// Initialize hash context with thread pool
// num_threads: 0 = auto-detect (use number of CPU cores, capped at 8)
fh_context_t* fh_init(uint32_t num_threads);
// Cleanup context
void fh_cleanup(fh_context_t* ctx);
// Hash a single file (mmap + SIMD SHA256)
// Returns: hex string (caller frees with fh_free_string)
// Note: For batch operations, use fh_hash_directory_batch to amortize CGo overhead
char* fh_hash_file(fh_context_t* ctx, const char* path);
// Hash entire directory (parallel file hashing with combined result)
// Uses worker pool internally, returns single combined hash
// Returns: hex string (caller frees with fh_free_string)
char* fh_hash_directory(fh_context_t* ctx, const char* path);
// Batch hash multiple files (single CGo call for entire batch)
// paths: array of file paths
// count: number of paths
// out_hashes: pre-allocated array of 65-char buffers (64 hex + null terminator)
// Returns: 0 on success, -1 on error
int fh_hash_batch(fh_context_t* ctx, const char** paths, uint32_t count, char** out_hashes);
// Hash directory with batch output (get individual file hashes)
// out_hashes: pre-allocated array of 65-char buffers
// out_paths: optional array of path buffers (can be NULL)
// max_results: size of output arrays
// out_count: actual number of files hashed
// Returns: 0 on success, -1 on error
int fh_hash_directory_batch(
fh_context_t* ctx,
const char* dir_path,
char** out_hashes,
char** out_paths,
uint32_t max_results,
uint32_t* out_count
);
// Simple combined hash (single CGo call, single result)
// Best for: quick directory hash verification
// Returns: hex string (caller frees with fh_free_string)
char* fh_hash_directory_combined(fh_context_t* ctx, const char* dir_path);
// Free string returned by library
void fh_free_string(char* str);
// Error handling
const char* fh_last_error(fh_context_t* ctx);
void fh_clear_error(fh_context_t* ctx);
// Configuration
void fh_set_buffer_size(fh_context_t* ctx, size_t buffer_size);
size_t fh_get_buffer_size(fh_context_t* ctx);
// SIMD detection
int fh_has_simd_sha256(void); // Returns 1 if SIMD SHA256 available, 0 otherwise
const char* fh_get_simd_impl_name(void); // Returns "SHA-NI", "ARMv8", or "generic"
#ifdef __cplusplus
}
#endif
#endif // DATASET_HASH_H