refactor(cli): remove deprecated native hash modules

Remove obsolete native hash implementation files:
- Delete native/hash.zig (superseded by utils/hash.zig)
- Delete utils/native_bridge.zig (replaced by direct TLS)
- Delete utils/native_hash.zig (consolidated into utils/hash.zig)

Cleanup as part of CLI hardening.
This commit is contained in:
Jeremie Fraeys 2026-03-04 20:25:25 -05:00
parent 4c2af17ad6
commit 6a0555207e
No known key found for this signature in database
3 changed files with 0 additions and 420 deletions

View file

@ -1,103 +0,0 @@
const std = @import("std");
const build_options = @import("build_options");
pub const HashError = error{
ContextInitFailed,
HashFailed,
InvalidPath,
OutOfMemory,
NotAvailable,
};
// Conditionally compile C imports only when not cross-compiling
const c = if (build_options.is_cross_compiling)
struct {
pub const fh_context_t = opaque {};
pub fn fh_init(_: i32) ?*fh_context_t {
return null;
}
pub fn fh_hash_directory_combined(_: *fh_context_t, _: [*c]const u8) [*c]u8 {
return null;
}
pub fn fh_free_string(_: [*c]u8) void {}
pub fn fh_has_simd_sha256() i32 {
return 0;
}
pub fn fh_get_simd_impl_name() [*c]const u8 {
return @ptrCast(@alignCast("none"));
}
}
else
@cImport({
@cInclude("dataset_hash.h");
});
// Global context for reuse across multiple hash operations
var global_ctx: ?*c.fh_context_t = null;
var ctx_initialized = std.atomic.Value(bool).init(false);
var init_mutex = std.Thread.Mutex{};
/// Initialize global hash context once (thread-safe)
pub fn init() !void {
if (build_options.is_cross_compiling) {
return HashError.NotAvailable;
}
if (ctx_initialized.load(.seq_cst)) return;
init_mutex.lock();
defer init_mutex.unlock();
if (ctx_initialized.load(.seq_cst)) return; // Double-check
const start = std.time.milliTimestamp();
global_ctx = c.fh_init(0); // 0 = auto-detect threads
const elapsed = std.time.milliTimestamp() - start;
if (global_ctx == null) {
return HashError.ContextInitFailed;
}
ctx_initialized.store(true, .seq_cst);
std.log.info("[native] hash context initialized: {}ms", .{elapsed});
}
/// Hash a directory using the native library (reuses global context)
/// Returns the hex-encoded SHA256 hash string
pub fn hashDirectory(allocator: std.mem.Allocator, path: []const u8) ![]const u8 {
if (build_options.is_cross_compiling) {
return HashError.NotAvailable;
}
try init(); // Idempotent initialization
const ctx = global_ctx.?; // Safe: init() guarantees non-null
// Convert path to null-terminated C string
const c_path = try allocator.dupeZ(u8, path);
defer allocator.free(c_path);
// Call native function
const result = c.fh_hash_directory_combined(ctx, c_path);
if (result == null) {
return HashError.HashFailed;
}
defer c.fh_free_string(result);
// Convert result to Zig string
const result_slice = std.mem.span(result);
return try allocator.dupe(u8, result_slice);
}
/// Check if SIMD SHA256 is available
pub fn hasSimdSha256() bool {
if (build_options.is_cross_compiling) return false;
return c.fh_has_simd_sha256() == 1;
}
/// Get the name of the SIMD implementation being used
pub fn getSimdImplName() []const u8 {
if (build_options.is_cross_compiling) return "none";
const name = c.fh_get_simd_impl_name();
return std.mem.span(name);
}

View file

@ -1,122 +0,0 @@
//! Native library bridge for high-performance operations
//!
//! Provides Zig bindings to the native/ C++ libraries:
//! - dataset_hash: SIMD-accelerated SHA256 hashing
//! - queue_index: High-performance task queue
//!
//! The native libraries provide:
//! - 78% syscall reduction for hashing
//! - 21,000x faster queue operations
//! - Hardware acceleration (SHA-NI, ARMv8 crypto)
const std = @import("std");
// Link against native dataset_hash library
const c = @cImport({
@cInclude("dataset_hash.h");
});
/// Opaque handle for native hash context
pub const HashContext = opaque {};
/// Initialize hash context with thread pool
/// num_threads: 0 = auto-detect (capped at 8)
pub fn initHashContext(num_threads: u32) ?*HashContext {
return @ptrCast(c.fh_init(num_threads));
}
/// Cleanup hash context
pub fn cleanupHashContext(ctx: ?*HashContext) void {
if (ctx) |ptr| {
c.fh_cleanup(@ptrCast(ptr));
}
}
/// Hash a single file using native SIMD implementation
/// Returns hex string (caller must free with freeString)
pub fn hashFile(ctx: ?*HashContext, path: []const u8) ![]const u8 {
const c_path = try std.heap.c_allocator.dupeZ(u8, path);
defer std.heap.c_allocator.free(c_path);
const result = c.fh_hash_file(@ptrCast(ctx), c_path.ptr);
if (result == null) {
return error.HashFailed;
}
defer c.fh_free_string(result);
const len = std.mem.len(result);
return try std.heap.c_allocator.dupe(u8, result[0..len]);
}
/// Hash entire directory (parallel, combined result)
pub fn hashDirectory(ctx: ?*HashContext, path: []const u8) ![]const u8 {
const c_path = try std.heap.c_allocator.dupeZ(u8, path);
defer std.heap.c_allocator.free(c_path);
const result = c.fh_hash_directory(@ptrCast(ctx), c_path.ptr);
if (result == null) {
return error.HashFailed;
}
defer c.fh_free_string(result);
const len = std.mem.len(result);
return try std.heap.c_allocator.dupe(u8, result[0..len]);
}
/// Free string returned by native library
pub fn freeString(str: []const u8) void {
std.heap.c_allocator.free(str);
}
/// Hash data using native library (convenience function)
pub fn hashData(data: []const u8) ![64]u8 {
// Write data to temp file and hash it
const tmp_path = try std.fs.path.join(std.heap.c_allocator, &.{ "/tmp", "fetchml_hash_tmp" });
defer std.heap.c_allocator.free(tmp_path);
try std.fs.cwd().writeFile(.{
.sub_path = tmp_path,
.data = data,
});
defer std.fs.cwd().deleteFile(tmp_path) catch {};
const ctx = initHashContext(0) orelse return error.InitFailed;
defer cleanupHashContext(ctx);
const hash_str = try hashFile(ctx, tmp_path);
defer freeString(hash_str);
// Parse hex string to bytes
var result: [64]u8 = undefined;
@memcpy(&result, hash_str[0..64]);
return result;
}
/// Benchmark native vs standard hashing
pub fn benchmark(allocator: std.mem.Allocator, path: []const u8, iterations: u32) !void {
const ctx = initHashContext(0) orelse {
std.debug.print("Failed to initialize native hash context\n", .{});
return;
};
defer cleanupHashContext(ctx);
var timer = try std.time.Timer.start();
// Warm up
_ = try hashFile(ctx, path);
// Benchmark native
timer.reset();
for (0..iterations) |_| {
const hash = try hashFile(ctx, path);
freeString(hash);
}
const native_time = timer.read();
std.debug.print("Native SIMD SHA256: {} ms for {d} iterations\n", .{
native_time / std.time.ns_per_ms,
iterations,
});
_ = allocator; // Reserved for future comparison with Zig implementation
}

View file

@ -1,195 +0,0 @@
const std = @import("std");
const c = @cImport({
@cInclude("dataset_hash.h");
});
/// Native hash context for high-performance file hashing
pub const NativeHasher = struct {
ctx: *c.fh_context_t,
allocator: std.mem.Allocator,
/// Initialize native hasher with thread pool
/// num_threads: 0 = auto-detect (use hardware concurrency)
pub fn init(allocator: std.mem.Allocator, num_threads: u32) !NativeHasher {
const ctx = c.fh_init(num_threads);
if (ctx == null) return error.NativeInitFailed;
return .{
.ctx = ctx,
.allocator = allocator,
};
}
/// Cleanup native hasher and thread pool
pub fn deinit(self: *NativeHasher) void {
c.fh_cleanup(self.ctx);
}
/// Hash a single file
pub fn hashFile(self: *NativeHasher, path: []const u8) ![]const u8 {
const c_path = try self.allocator.dupeZ(u8, path);
defer self.allocator.free(c_path);
const result = c.fh_hash_file(self.ctx, c_path.ptr);
if (result == null) return error.HashFailed;
defer c.fh_free_string(result);
return try self.allocator.dupe(u8, std.mem.span(result));
}
/// Batch hash multiple files (amortizes CGo overhead)
pub fn hashBatch(self: *NativeHasher, paths: []const []const u8) ![][]const u8 {
// Convert paths to C string array
const c_paths = try self.allocator.alloc([*c]const u8, paths.len);
defer self.allocator.free(c_paths);
for (paths, 0..) |path, i| {
const c_path = try self.allocator.dupeZ(u8, path);
c_paths[i] = c_path.ptr;
// Note: we need to keep these alive until after fh_hash_batch
}
defer {
for (c_paths) |p| {
self.allocator.free(std.mem.span(p));
}
}
// Allocate results array
const results = try self.allocator.alloc([*c]u8, paths.len);
defer self.allocator.free(results);
// Call native batch hash
const ret = c.fh_hash_batch(self.ctx, c_paths.ptr, @intCast(paths.len), results.ptr);
if (ret != 0) return error.HashFailed;
// Convert results to Zig strings
var hashes = try self.allocator.alloc([]const u8, paths.len);
errdefer {
for (hashes) |h| self.allocator.free(h);
self.allocator.free(hashes);
}
for (results, 0..) |r, i| {
hashes[i] = try self.allocator.dupe(u8, std.mem.span(r));
c.fh_free_string(r);
}
return hashes;
}
/// Hash entire directory (combined hash)
pub fn hashDirectory(self: *NativeHasher, dir_path: []const u8) ![]const u8 {
const c_path = try self.allocator.dupeZ(u8, dir_path);
defer self.allocator.free(c_path);
const result = c.fh_hash_directory(self.ctx, c_path.ptr);
if (result == null) return error.HashFailed;
defer c.fh_free_string(result);
return try self.allocator.dupe(u8, std.mem.span(result));
}
/// Hash directory with batch output (individual file hashes)
pub fn hashDirectoryBatch(
self: *NativeHasher,
dir_path: []const u8,
max_results: u32,
) !struct { hashes: [][]const u8, paths: [][]const u8, count: u32 } {
const c_path = try self.allocator.dupeZ(u8, dir_path);
defer self.allocator.free(c_path);
// Allocate output arrays
const hashes = try self.allocator.alloc([*c]u8, max_results);
defer self.allocator.free(hashes);
const paths = try self.allocator.alloc([*c]u8, max_results);
defer self.allocator.free(paths);
var count: u32 = 0;
const ret = c.fh_hash_directory_batch(
self.ctx,
c_path.ptr,
hashes.ptr,
paths.ptr,
max_results,
&count,
);
if (ret != 0) return error.HashFailed;
// Convert to Zig arrays
var zig_hashes = try self.allocator.alloc([]const u8, count);
errdefer {
for (zig_hashes) |h| self.allocator.free(h);
self.allocator.free(zig_hashes);
}
var zig_paths = try self.allocator.alloc([]const u8, count);
errdefer {
for (zig_paths) |p| self.allocator.free(p);
self.allocator.free(zig_paths);
}
for (0..count) |i| {
zig_hashes[i] = try self.allocator.dupe(u8, std.mem.span(hashes[i]));
c.fh_free_string(hashes[i]);
zig_paths[i] = try self.allocator.dupe(u8, std.mem.span(paths[i]));
c.fh_free_string(paths[i]);
}
return .{
.hashes = zig_hashes,
.paths = zig_paths,
.count = count,
};
}
/// Check if SIMD SHA-256 is available
pub fn hasSimd(self: *NativeHasher) bool {
_ = self;
return c.fh_has_simd_sha256() != 0;
}
/// Get implementation info (SIMD type, etc.)
pub fn getImplInfo(self: *NativeHasher) []const u8 {
_ = self;
return std.mem.span(c.fh_get_simd_impl_name());
}
};
/// Convenience function: hash directory using native library
pub fn hashDirectoryNative(allocator: std.mem.Allocator, dir_path: []const u8) ![]const u8 {
var hasher = try NativeHasher.init(allocator, 0); // Auto-detect threads
defer hasher.deinit();
return try hasher.hashDirectory(dir_path);
}
/// Convenience function: batch hash files using native library
pub fn hashFilesNative(
allocator: std.mem.Allocator,
paths: []const []const u8,
) ![][]const u8 {
var hasher = try NativeHasher.init(allocator, 0);
defer hasher.deinit();
return try hasher.hashBatch(paths);
}
test "NativeHasher basic operations" {
const allocator = std.testing.allocator;
// Skip if native library not available
var hasher = NativeHasher.init(allocator, 1) catch |err| {
if (err == error.NativeInitFailed) {
std.debug.print("Native library not available, skipping test\n", .{});
return;
}
return err;
};
defer hasher.deinit();
// Check SIMD availability
const has_simd = hasher.hasSimd();
const impl_name = hasher.getImplInfo();
std.debug.print("SIMD: {any}, Impl: {s}\n", .{ has_simd, impl_name });
}