diff --git a/cli/src/native/hash.zig b/cli/src/native/hash.zig deleted file mode 100644 index bc735ca..0000000 --- a/cli/src/native/hash.zig +++ /dev/null @@ -1,103 +0,0 @@ -const std = @import("std"); -const build_options = @import("build_options"); - -pub const HashError = error{ - ContextInitFailed, - HashFailed, - InvalidPath, - OutOfMemory, - NotAvailable, -}; - -// Conditionally compile C imports only when not cross-compiling -const c = if (build_options.is_cross_compiling) - struct { - pub const fh_context_t = opaque {}; - pub fn fh_init(_: i32) ?*fh_context_t { - return null; - } - pub fn fh_hash_directory_combined(_: *fh_context_t, _: [*c]const u8) [*c]u8 { - return null; - } - pub fn fh_free_string(_: [*c]u8) void {} - pub fn fh_has_simd_sha256() i32 { - return 0; - } - pub fn fh_get_simd_impl_name() [*c]const u8 { - return @ptrCast(@alignCast("none")); - } - } -else - @cImport({ - @cInclude("dataset_hash.h"); - }); - -// Global context for reuse across multiple hash operations -var global_ctx: ?*c.fh_context_t = null; -var ctx_initialized = std.atomic.Value(bool).init(false); -var init_mutex = std.Thread.Mutex{}; - -/// Initialize global hash context once (thread-safe) -pub fn init() !void { - if (build_options.is_cross_compiling) { - return HashError.NotAvailable; - } - - if (ctx_initialized.load(.seq_cst)) return; - - init_mutex.lock(); - defer init_mutex.unlock(); - - if (ctx_initialized.load(.seq_cst)) return; // Double-check - - const start = std.time.milliTimestamp(); - global_ctx = c.fh_init(0); // 0 = auto-detect threads - const elapsed = std.time.milliTimestamp() - start; - - if (global_ctx == null) { - return HashError.ContextInitFailed; - } - - ctx_initialized.store(true, .seq_cst); - std.log.info("[native] hash context initialized: {}ms", .{elapsed}); -} - -/// Hash a directory using the native library (reuses global context) -/// Returns the hex-encoded SHA256 hash string -pub fn hashDirectory(allocator: std.mem.Allocator, path: []const u8) ![]const u8 { - if (build_options.is_cross_compiling) { - return HashError.NotAvailable; - } - - try init(); // Idempotent initialization - - const ctx = global_ctx.?; // Safe: init() guarantees non-null - - // Convert path to null-terminated C string - const c_path = try allocator.dupeZ(u8, path); - defer allocator.free(c_path); - - // Call native function - const result = c.fh_hash_directory_combined(ctx, c_path); - if (result == null) { - return HashError.HashFailed; - } - defer c.fh_free_string(result); - - // Convert result to Zig string - const result_slice = std.mem.span(result); - return try allocator.dupe(u8, result_slice); -} - -/// Check if SIMD SHA256 is available -pub fn hasSimdSha256() bool { - if (build_options.is_cross_compiling) return false; - return c.fh_has_simd_sha256() == 1; -} - -/// Get the name of the SIMD implementation being used -pub fn getSimdImplName() []const u8 { - if (build_options.is_cross_compiling) return "none"; - const name = c.fh_get_simd_impl_name(); - return std.mem.span(name); -} diff --git a/cli/src/utils/native_bridge.zig b/cli/src/utils/native_bridge.zig deleted file mode 100644 index eece5dd..0000000 --- a/cli/src/utils/native_bridge.zig +++ /dev/null @@ -1,122 +0,0 @@ -//! Native library bridge for high-performance operations -//! -//! Provides Zig bindings to the native/ C++ libraries: -//! - dataset_hash: SIMD-accelerated SHA256 hashing -//! - queue_index: High-performance task queue -//! -//! The native libraries provide: -//! - 78% syscall reduction for hashing -//! - 21,000x faster queue operations -//! - Hardware acceleration (SHA-NI, ARMv8 crypto) - -const std = @import("std"); - -// Link against native dataset_hash library -const c = @cImport({ - @cInclude("dataset_hash.h"); -}); - -/// Opaque handle for native hash context -pub const HashContext = opaque {}; - -/// Initialize hash context with thread pool -/// num_threads: 0 = auto-detect (capped at 8) -pub fn initHashContext(num_threads: u32) ?*HashContext { - return @ptrCast(c.fh_init(num_threads)); -} - -/// Cleanup hash context -pub fn cleanupHashContext(ctx: ?*HashContext) void { - if (ctx) |ptr| { - c.fh_cleanup(@ptrCast(ptr)); - } -} - -/// Hash a single file using native SIMD implementation -/// Returns hex string (caller must free with freeString) -pub fn hashFile(ctx: ?*HashContext, path: []const u8) ![]const u8 { - const c_path = try std.heap.c_allocator.dupeZ(u8, path); - defer std.heap.c_allocator.free(c_path); - - const result = c.fh_hash_file(@ptrCast(ctx), c_path.ptr); - if (result == null) { - return error.HashFailed; - } - defer c.fh_free_string(result); - - const len = std.mem.len(result); - return try std.heap.c_allocator.dupe(u8, result[0..len]); -} - -/// Hash entire directory (parallel, combined result) -pub fn hashDirectory(ctx: ?*HashContext, path: []const u8) ![]const u8 { - const c_path = try std.heap.c_allocator.dupeZ(u8, path); - defer std.heap.c_allocator.free(c_path); - - const result = c.fh_hash_directory(@ptrCast(ctx), c_path.ptr); - if (result == null) { - return error.HashFailed; - } - defer c.fh_free_string(result); - - const len = std.mem.len(result); - return try std.heap.c_allocator.dupe(u8, result[0..len]); -} - -/// Free string returned by native library -pub fn freeString(str: []const u8) void { - std.heap.c_allocator.free(str); -} - -/// Hash data using native library (convenience function) -pub fn hashData(data: []const u8) ![64]u8 { - // Write data to temp file and hash it - const tmp_path = try std.fs.path.join(std.heap.c_allocator, &.{ "/tmp", "fetchml_hash_tmp" }); - defer std.heap.c_allocator.free(tmp_path); - - try std.fs.cwd().writeFile(.{ - .sub_path = tmp_path, - .data = data, - }); - defer std.fs.cwd().deleteFile(tmp_path) catch {}; - - const ctx = initHashContext(0) orelse return error.InitFailed; - defer cleanupHashContext(ctx); - - const hash_str = try hashFile(ctx, tmp_path); - defer freeString(hash_str); - - // Parse hex string to bytes - var result: [64]u8 = undefined; - @memcpy(&result, hash_str[0..64]); - return result; -} - -/// Benchmark native vs standard hashing -pub fn benchmark(allocator: std.mem.Allocator, path: []const u8, iterations: u32) !void { - const ctx = initHashContext(0) orelse { - std.debug.print("Failed to initialize native hash context\n", .{}); - return; - }; - defer cleanupHashContext(ctx); - - var timer = try std.time.Timer.start(); - - // Warm up - _ = try hashFile(ctx, path); - - // Benchmark native - timer.reset(); - for (0..iterations) |_| { - const hash = try hashFile(ctx, path); - freeString(hash); - } - const native_time = timer.read(); - - std.debug.print("Native SIMD SHA256: {} ms for {d} iterations\n", .{ - native_time / std.time.ns_per_ms, - iterations, - }); - - _ = allocator; // Reserved for future comparison with Zig implementation -} diff --git a/cli/src/utils/native_hash.zig b/cli/src/utils/native_hash.zig deleted file mode 100644 index 480c7d2..0000000 --- a/cli/src/utils/native_hash.zig +++ /dev/null @@ -1,195 +0,0 @@ -const std = @import("std"); -const c = @cImport({ - @cInclude("dataset_hash.h"); -}); - -/// Native hash context for high-performance file hashing -pub const NativeHasher = struct { - ctx: *c.fh_context_t, - allocator: std.mem.Allocator, - - /// Initialize native hasher with thread pool - /// num_threads: 0 = auto-detect (use hardware concurrency) - pub fn init(allocator: std.mem.Allocator, num_threads: u32) !NativeHasher { - const ctx = c.fh_init(num_threads); - if (ctx == null) return error.NativeInitFailed; - - return .{ - .ctx = ctx, - .allocator = allocator, - }; - } - - /// Cleanup native hasher and thread pool - pub fn deinit(self: *NativeHasher) void { - c.fh_cleanup(self.ctx); - } - - /// Hash a single file - pub fn hashFile(self: *NativeHasher, path: []const u8) ![]const u8 { - const c_path = try self.allocator.dupeZ(u8, path); - defer self.allocator.free(c_path); - - const result = c.fh_hash_file(self.ctx, c_path.ptr); - if (result == null) return error.HashFailed; - defer c.fh_free_string(result); - - return try self.allocator.dupe(u8, std.mem.span(result)); - } - - /// Batch hash multiple files (amortizes CGo overhead) - pub fn hashBatch(self: *NativeHasher, paths: []const []const u8) ![][]const u8 { - // Convert paths to C string array - const c_paths = try self.allocator.alloc([*c]const u8, paths.len); - defer self.allocator.free(c_paths); - - for (paths, 0..) |path, i| { - const c_path = try self.allocator.dupeZ(u8, path); - c_paths[i] = c_path.ptr; - // Note: we need to keep these alive until after fh_hash_batch - } - defer { - for (c_paths) |p| { - self.allocator.free(std.mem.span(p)); - } - } - - // Allocate results array - const results = try self.allocator.alloc([*c]u8, paths.len); - defer self.allocator.free(results); - - // Call native batch hash - const ret = c.fh_hash_batch(self.ctx, c_paths.ptr, @intCast(paths.len), results.ptr); - if (ret != 0) return error.HashFailed; - - // Convert results to Zig strings - var hashes = try self.allocator.alloc([]const u8, paths.len); - errdefer { - for (hashes) |h| self.allocator.free(h); - self.allocator.free(hashes); - } - - for (results, 0..) |r, i| { - hashes[i] = try self.allocator.dupe(u8, std.mem.span(r)); - c.fh_free_string(r); - } - - return hashes; - } - - /// Hash entire directory (combined hash) - pub fn hashDirectory(self: *NativeHasher, dir_path: []const u8) ![]const u8 { - const c_path = try self.allocator.dupeZ(u8, dir_path); - defer self.allocator.free(c_path); - - const result = c.fh_hash_directory(self.ctx, c_path.ptr); - if (result == null) return error.HashFailed; - defer c.fh_free_string(result); - - return try self.allocator.dupe(u8, std.mem.span(result)); - } - - /// Hash directory with batch output (individual file hashes) - pub fn hashDirectoryBatch( - self: *NativeHasher, - dir_path: []const u8, - max_results: u32, - ) !struct { hashes: [][]const u8, paths: [][]const u8, count: u32 } { - const c_path = try self.allocator.dupeZ(u8, dir_path); - defer self.allocator.free(c_path); - - // Allocate output arrays - const hashes = try self.allocator.alloc([*c]u8, max_results); - defer self.allocator.free(hashes); - - const paths = try self.allocator.alloc([*c]u8, max_results); - defer self.allocator.free(paths); - - var count: u32 = 0; - - const ret = c.fh_hash_directory_batch( - self.ctx, - c_path.ptr, - hashes.ptr, - paths.ptr, - max_results, - &count, - ); - if (ret != 0) return error.HashFailed; - - // Convert to Zig arrays - var zig_hashes = try self.allocator.alloc([]const u8, count); - errdefer { - for (zig_hashes) |h| self.allocator.free(h); - self.allocator.free(zig_hashes); - } - - var zig_paths = try self.allocator.alloc([]const u8, count); - errdefer { - for (zig_paths) |p| self.allocator.free(p); - self.allocator.free(zig_paths); - } - - for (0..count) |i| { - zig_hashes[i] = try self.allocator.dupe(u8, std.mem.span(hashes[i])); - c.fh_free_string(hashes[i]); - - zig_paths[i] = try self.allocator.dupe(u8, std.mem.span(paths[i])); - c.fh_free_string(paths[i]); - } - - return .{ - .hashes = zig_hashes, - .paths = zig_paths, - .count = count, - }; - } - - /// Check if SIMD SHA-256 is available - pub fn hasSimd(self: *NativeHasher) bool { - _ = self; - return c.fh_has_simd_sha256() != 0; - } - - /// Get implementation info (SIMD type, etc.) - pub fn getImplInfo(self: *NativeHasher) []const u8 { - _ = self; - return std.mem.span(c.fh_get_simd_impl_name()); - } -}; - -/// Convenience function: hash directory using native library -pub fn hashDirectoryNative(allocator: std.mem.Allocator, dir_path: []const u8) ![]const u8 { - var hasher = try NativeHasher.init(allocator, 0); // Auto-detect threads - defer hasher.deinit(); - return try hasher.hashDirectory(dir_path); -} - -/// Convenience function: batch hash files using native library -pub fn hashFilesNative( - allocator: std.mem.Allocator, - paths: []const []const u8, -) ![][]const u8 { - var hasher = try NativeHasher.init(allocator, 0); - defer hasher.deinit(); - return try hasher.hashBatch(paths); -} - -test "NativeHasher basic operations" { - const allocator = std.testing.allocator; - - // Skip if native library not available - var hasher = NativeHasher.init(allocator, 1) catch |err| { - if (err == error.NativeInitFailed) { - std.debug.print("Native library not available, skipping test\n", .{}); - return; - } - return err; - }; - defer hasher.deinit(); - - // Check SIMD availability - const has_simd = hasher.hasSimd(); - const impl_name = hasher.getImplInfo(); - std.debug.print("SIMD: {any}, Impl: {s}\n", .{ has_simd, impl_name }); -}