// dataset_hash.cpp - C API implementation using C-style internals #include "dataset_hash.h" #include "crypto/sha256_hasher.h" #include "io/file_hash.h" #include "threading/parallel_hash.h" #include "../common/include/secure_mem.h" #include #include using fetchml::common::safe_strncpy; // Context structure - simple C-style struct fh_context { ParallelHasher hasher; size_t buffer_size; char last_error[256]; }; fh_context_t* fh_init(uint32_t num_threads) { auto* ctx = (fh_context_t*)malloc(sizeof(fh_context_t)); if (!ctx) return nullptr; ctx->buffer_size = 64 * 1024; ctx->last_error[0] = '\0'; if (!parallel_hasher_init(&ctx->hasher, num_threads, ctx->buffer_size)) { free(ctx); return nullptr; } return ctx; } void fh_cleanup(fh_context_t* ctx) { if (ctx) { parallel_hasher_cleanup(&ctx->hasher); free(ctx); } } char* fh_hash_file(fh_context_t* ctx, const char* path) { if (!ctx || !path) return nullptr; char hash[65]; if (hash_file(path, ctx->buffer_size, hash) != 0) { safe_strncpy(ctx->last_error, "Failed to hash file", sizeof(ctx->last_error)); return nullptr; } char* result = (char*)malloc(65); if (result) { memcpy(result, hash, 65); } return result; } char* fh_hash_directory(fh_context_t* ctx, const char* path) { if (!ctx || !path) return nullptr; char* result = (char*)malloc(65); if (!result) return nullptr; if (parallel_hash_directory(&ctx->hasher, path, result) != 0) { free(result); safe_strncpy(ctx->last_error, "Failed to hash directory", sizeof(ctx->last_error)); return nullptr; } return result; } int fh_hash_batch(fh_context_t* ctx, const char** paths, uint32_t count, char** out_hashes) { if (!ctx || !paths || !out_hashes || count == 0) return -1; return hash_files_batch(paths, count, out_hashes, ctx->buffer_size); } int fh_hash_directory_batch( fh_context_t* ctx, const char* dir_path, char** out_hashes, char** out_paths, uint32_t max_results, uint32_t* out_count) { if (!ctx || !dir_path || !out_hashes) return -1; return parallel_hash_directory_batch(&ctx->hasher, dir_path, out_hashes, out_paths, max_results, out_count); } char* fh_hash_directory_combined(fh_context_t* ctx, const char* dir_path) { return fh_hash_directory(ctx, dir_path); } void fh_free_string(char* str) { free(str); } const char* fh_last_error(fh_context_t* ctx) { if (!ctx || !ctx->last_error[0]) return nullptr; return ctx->last_error; } void fh_clear_error(fh_context_t* ctx) { if (ctx) { ctx->last_error[0] = '\0'; } } void fh_set_buffer_size(fh_context_t* ctx, size_t buffer_size) { if (ctx) { ctx->buffer_size = buffer_size; } } size_t fh_get_buffer_size(fh_context_t* ctx) { return ctx ? ctx->buffer_size : 0; } int fh_has_simd_sha256(void) { return sha256_has_hardware_accel(); } const char* fh_get_simd_impl_name(void) { return sha256_impl_name(); } // Constant-time hash comparison int fh_hashes_equal(const char* hash_a, const char* hash_b) { if (!hash_a || !hash_b) return 0; // SHA256 hex strings are always 64 characters return fetchml::common::secure_memcmp(hash_a, hash_b, 64) == 0 ? 1 : 0; }