refactor(cli): remove deprecated native hash modules

Remove obsolete native hash implementation files: - Delete native/hash.zig (superseded by utils/hash.zig) - Delete utils/native_bridge.zig (replaced by direct TLS) - Delete utils/native_hash.zig (consolidated into utils/hash.zig) Cleanup as part of CLI hardening.
2026-03-04 20:25:25 -05:00 · 2026-03-04 20:25:25 -05:00 · 6a0555207e
commit 6a0555207e
parent 4c2af17ad6
3 changed files with 0 additions and 420 deletions
--- a/cli/src/native/hash.zig
+++ b/cli/src/native/hash.zig
@ -1,103 +0,0 @@
-const std = @import("std");
-const build_options = @import("build_options");
-
-pub const HashError = error{
-    ContextInitFailed,
-    HashFailed,
-    InvalidPath,
-    OutOfMemory,
-    NotAvailable,
-};
-
-// Conditionally compile C imports only when not cross-compiling
-const c = if (build_options.is_cross_compiling)
-    struct {
-        pub const fh_context_t = opaque {};
-        pub fn fh_init(_: i32) ?*fh_context_t {
-            return null;
-        }
-        pub fn fh_hash_directory_combined(_: *fh_context_t, _: [*c]const u8) [*c]u8 {
-            return null;
-        }
-        pub fn fh_free_string(_: [*c]u8) void {}
-        pub fn fh_has_simd_sha256() i32 {
-            return 0;
-        }
-        pub fn fh_get_simd_impl_name() [*c]const u8 {
-            return @ptrCast(@alignCast("none"));
-        }
-    }
-else
-    @cImport({
-        @cInclude("dataset_hash.h");
-    });
-
-// Global context for reuse across multiple hash operations
-var global_ctx: ?*c.fh_context_t = null;
-var ctx_initialized = std.atomic.Value(bool).init(false);
-var init_mutex = std.Thread.Mutex{};
-
-/// Initialize global hash context once (thread-safe)
-pub fn init() !void {
-    if (build_options.is_cross_compiling) {
-        return HashError.NotAvailable;
-    }
-
-    if (ctx_initialized.load(.seq_cst)) return;
-
-    init_mutex.lock();
-    defer init_mutex.unlock();
-
-    if (ctx_initialized.load(.seq_cst)) return; // Double-check
-
-    const start = std.time.milliTimestamp();
-    global_ctx = c.fh_init(0); // 0 = auto-detect threads
-    const elapsed = std.time.milliTimestamp() - start;
-
-    if (global_ctx == null) {
-        return HashError.ContextInitFailed;
-    }
-
-    ctx_initialized.store(true, .seq_cst);
-    std.log.info("[native] hash context initialized: {}ms", .{elapsed});
-}
-
-/// Hash a directory using the native library (reuses global context)
-/// Returns the hex-encoded SHA256 hash string
-pub fn hashDirectory(allocator: std.mem.Allocator, path: []const u8) ![]const u8 {
-    if (build_options.is_cross_compiling) {
-        return HashError.NotAvailable;
-    }
-
-    try init(); // Idempotent initialization
-
-    const ctx = global_ctx.?; // Safe: init() guarantees non-null
-
-    // Convert path to null-terminated C string
-    const c_path = try allocator.dupeZ(u8, path);
-    defer allocator.free(c_path);
-
-    // Call native function
-    const result = c.fh_hash_directory_combined(ctx, c_path);
-    if (result == null) {
-        return HashError.HashFailed;
-    }
-    defer c.fh_free_string(result);
-
-    // Convert result to Zig string
-    const result_slice = std.mem.span(result);
-    return try allocator.dupe(u8, result_slice);
-}
-
-/// Check if SIMD SHA256 is available
-pub fn hasSimdSha256() bool {
-    if (build_options.is_cross_compiling) return false;
-    return c.fh_has_simd_sha256() == 1;
-}
-
-/// Get the name of the SIMD implementation being used
-pub fn getSimdImplName() []const u8 {
-    if (build_options.is_cross_compiling) return "none";
-    const name = c.fh_get_simd_impl_name();
-    return std.mem.span(name);
-}
--- a/cli/src/utils/native_bridge.zig
+++ b/cli/src/utils/native_bridge.zig
@ -1,122 +0,0 @@
-//! Native library bridge for high-performance operations
-//!
-//! Provides Zig bindings to the native/ C++ libraries:
-//! - dataset_hash: SIMD-accelerated SHA256 hashing
-//! - queue_index: High-performance task queue
-//!
-//! The native libraries provide:
-//! - 78% syscall reduction for hashing
-//! - 21,000x faster queue operations
-//! - Hardware acceleration (SHA-NI, ARMv8 crypto)
-
-const std = @import("std");
-
-// Link against native dataset_hash library
-const c = @cImport({
-    @cInclude("dataset_hash.h");
-});
-
-/// Opaque handle for native hash context
-pub const HashContext = opaque {};
-
-/// Initialize hash context with thread pool
-/// num_threads: 0 = auto-detect (capped at 8)
-pub fn initHashContext(num_threads: u32) ?*HashContext {
-    return @ptrCast(c.fh_init(num_threads));
-}
-
-/// Cleanup hash context
-pub fn cleanupHashContext(ctx: ?*HashContext) void {
-    if (ctx) |ptr| {
-        c.fh_cleanup(@ptrCast(ptr));
-    }
-}
-
-/// Hash a single file using native SIMD implementation
-/// Returns hex string (caller must free with freeString)
-pub fn hashFile(ctx: ?*HashContext, path: []const u8) ![]const u8 {
-    const c_path = try std.heap.c_allocator.dupeZ(u8, path);
-    defer std.heap.c_allocator.free(c_path);
-    
-    const result = c.fh_hash_file(@ptrCast(ctx), c_path.ptr);
-    if (result == null) {
-        return error.HashFailed;
-    }
-    defer c.fh_free_string(result);
-    
-    const len = std.mem.len(result);
-    return try std.heap.c_allocator.dupe(u8, result[0..len]);
-}
-
-/// Hash entire directory (parallel, combined result)
-pub fn hashDirectory(ctx: ?*HashContext, path: []const u8) ![]const u8 {
-    const c_path = try std.heap.c_allocator.dupeZ(u8, path);
-    defer std.heap.c_allocator.free(c_path);
-    
-    const result = c.fh_hash_directory(@ptrCast(ctx), c_path.ptr);
-    if (result == null) {
-        return error.HashFailed;
-    }
-    defer c.fh_free_string(result);
-    
-    const len = std.mem.len(result);
-    return try std.heap.c_allocator.dupe(u8, result[0..len]);
-}
-
-/// Free string returned by native library
-pub fn freeString(str: []const u8) void {
-    std.heap.c_allocator.free(str);
-}
-
-/// Hash data using native library (convenience function)
-pub fn hashData(data: []const u8) ![64]u8 {
-    // Write data to temp file and hash it
-    const tmp_path = try std.fs.path.join(std.heap.c_allocator, &.{ "/tmp", "fetchml_hash_tmp" });
-    defer std.heap.c_allocator.free(tmp_path);
-    
-    try std.fs.cwd().writeFile(.{
-        .sub_path = tmp_path,
-        .data = data,
-    });
-    defer std.fs.cwd().deleteFile(tmp_path) catch {};
-    
-    const ctx = initHashContext(0) orelse return error.InitFailed;
-    defer cleanupHashContext(ctx);
-    
-    const hash_str = try hashFile(ctx, tmp_path);
-    defer freeString(hash_str);
-    
-    // Parse hex string to bytes
-    var result: [64]u8 = undefined;
-    @memcpy(&result, hash_str[0..64]);
-    return result;
-}
-
-/// Benchmark native vs standard hashing
-pub fn benchmark(allocator: std.mem.Allocator, path: []const u8, iterations: u32) !void {
-    const ctx = initHashContext(0) orelse {
-        std.debug.print("Failed to initialize native hash context\n", .{});
-        return;
-    };
-    defer cleanupHashContext(ctx);
-    
-    var timer = try std.time.Timer.start();
-    
-    // Warm up
-    _ = try hashFile(ctx, path);
-    
-    // Benchmark native
-    timer.reset();
-    for (0..iterations) |_| {
-        const hash = try hashFile(ctx, path);
-        freeString(hash);
-    }
-    const native_time = timer.read();
-    
-    std.debug.print("Native SIMD SHA256: {} ms for {d} iterations\n", .{
-        native_time / std.time.ns_per_ms,
-        iterations,
-    });
-    
-    _ = allocator; // Reserved for future comparison with Zig implementation
-}
--- a/cli/src/utils/native_hash.zig
+++ b/cli/src/utils/native_hash.zig
@ -1,195 +0,0 @@
-const std = @import("std");
-const c = @cImport({
-    @cInclude("dataset_hash.h");
-});
-
-/// Native hash context for high-performance file hashing
-pub const NativeHasher = struct {
-    ctx: *c.fh_context_t,
-    allocator: std.mem.Allocator,
-
-    /// Initialize native hasher with thread pool
-    /// num_threads: 0 = auto-detect (use hardware concurrency)
-    pub fn init(allocator: std.mem.Allocator, num_threads: u32) !NativeHasher {
-        const ctx = c.fh_init(num_threads);
-        if (ctx == null) return error.NativeInitFailed;
-
-        return .{
-            .ctx = ctx,
-            .allocator = allocator,
-        };
-    }
-
-    /// Cleanup native hasher and thread pool
-    pub fn deinit(self: *NativeHasher) void {
-        c.fh_cleanup(self.ctx);
-    }
-
-    /// Hash a single file
-    pub fn hashFile(self: *NativeHasher, path: []const u8) ![]const u8 {
-        const c_path = try self.allocator.dupeZ(u8, path);
-        defer self.allocator.free(c_path);
-
-        const result = c.fh_hash_file(self.ctx, c_path.ptr);
-        if (result == null) return error.HashFailed;
-        defer c.fh_free_string(result);
-
-        return try self.allocator.dupe(u8, std.mem.span(result));
-    }
-
-    /// Batch hash multiple files (amortizes CGo overhead)
-    pub fn hashBatch(self: *NativeHasher, paths: []const []const u8) ![][]const u8 {
-        // Convert paths to C string array
-        const c_paths = try self.allocator.alloc([*c]const u8, paths.len);
-        defer self.allocator.free(c_paths);
-
-        for (paths, 0..) |path, i| {
-            const c_path = try self.allocator.dupeZ(u8, path);
-            c_paths[i] = c_path.ptr;
-            // Note: we need to keep these alive until after fh_hash_batch
-        }
-        defer {
-            for (c_paths) |p| {
-                self.allocator.free(std.mem.span(p));
-            }
-        }
-
-        // Allocate results array
-        const results = try self.allocator.alloc([*c]u8, paths.len);
-        defer self.allocator.free(results);
-
-        // Call native batch hash
-        const ret = c.fh_hash_batch(self.ctx, c_paths.ptr, @intCast(paths.len), results.ptr);
-        if (ret != 0) return error.HashFailed;
-
-        // Convert results to Zig strings
-        var hashes = try self.allocator.alloc([]const u8, paths.len);
-        errdefer {
-            for (hashes) |h| self.allocator.free(h);
-            self.allocator.free(hashes);
-        }
-
-        for (results, 0..) |r, i| {
-            hashes[i] = try self.allocator.dupe(u8, std.mem.span(r));
-            c.fh_free_string(r);
-        }
-
-        return hashes;
-    }
-
-    /// Hash entire directory (combined hash)
-    pub fn hashDirectory(self: *NativeHasher, dir_path: []const u8) ![]const u8 {
-        const c_path = try self.allocator.dupeZ(u8, dir_path);
-        defer self.allocator.free(c_path);
-
-        const result = c.fh_hash_directory(self.ctx, c_path.ptr);
-        if (result == null) return error.HashFailed;
-        defer c.fh_free_string(result);
-
-        return try self.allocator.dupe(u8, std.mem.span(result));
-    }
-
-    /// Hash directory with batch output (individual file hashes)
-    pub fn hashDirectoryBatch(
-        self: *NativeHasher,
-        dir_path: []const u8,
-        max_results: u32,
-    ) !struct { hashes: [][]const u8, paths: [][]const u8, count: u32 } {
-        const c_path = try self.allocator.dupeZ(u8, dir_path);
-        defer self.allocator.free(c_path);
-
-        // Allocate output arrays
-        const hashes = try self.allocator.alloc([*c]u8, max_results);
-        defer self.allocator.free(hashes);
-
-        const paths = try self.allocator.alloc([*c]u8, max_results);
-        defer self.allocator.free(paths);
-
-        var count: u32 = 0;
-
-        const ret = c.fh_hash_directory_batch(
-            self.ctx,
-            c_path.ptr,
-            hashes.ptr,
-            paths.ptr,
-            max_results,
-            &count,
-        );
-        if (ret != 0) return error.HashFailed;
-
-        // Convert to Zig arrays
-        var zig_hashes = try self.allocator.alloc([]const u8, count);
-        errdefer {
-            for (zig_hashes) |h| self.allocator.free(h);
-            self.allocator.free(zig_hashes);
-        }
-
-        var zig_paths = try self.allocator.alloc([]const u8, count);
-        errdefer {
-            for (zig_paths) |p| self.allocator.free(p);
-            self.allocator.free(zig_paths);
-        }
-
-        for (0..count) |i| {
-            zig_hashes[i] = try self.allocator.dupe(u8, std.mem.span(hashes[i]));
-            c.fh_free_string(hashes[i]);
-
-            zig_paths[i] = try self.allocator.dupe(u8, std.mem.span(paths[i]));
-            c.fh_free_string(paths[i]);
-        }
-
-        return .{
-            .hashes = zig_hashes,
-            .paths = zig_paths,
-            .count = count,
-        };
-    }
-
-    /// Check if SIMD SHA-256 is available
-    pub fn hasSimd(self: *NativeHasher) bool {
-        _ = self;
-        return c.fh_has_simd_sha256() != 0;
-    }
-
-    /// Get implementation info (SIMD type, etc.)
-    pub fn getImplInfo(self: *NativeHasher) []const u8 {
-        _ = self;
-        return std.mem.span(c.fh_get_simd_impl_name());
-    }
-};
-
-/// Convenience function: hash directory using native library
-pub fn hashDirectoryNative(allocator: std.mem.Allocator, dir_path: []const u8) ![]const u8 {
-    var hasher = try NativeHasher.init(allocator, 0); // Auto-detect threads
-    defer hasher.deinit();
-    return try hasher.hashDirectory(dir_path);
-}
-
-/// Convenience function: batch hash files using native library
-pub fn hashFilesNative(
-    allocator: std.mem.Allocator,
-    paths: []const []const u8,
-) ![][]const u8 {
-    var hasher = try NativeHasher.init(allocator, 0);
-    defer hasher.deinit();
-    return try hasher.hashBatch(paths);
-}
-
-test "NativeHasher basic operations" {
-    const allocator = std.testing.allocator;
-
-    // Skip if native library not available
-    var hasher = NativeHasher.init(allocator, 1) catch |err| {
-        if (err == error.NativeInitFailed) {
-            std.debug.print("Native library not available, skipping test\n", .{});
-            return;
-        }
-        return err;
-    };
-    defer hasher.deinit();
-
-    // Check SIMD availability
-    const has_simd = hasher.hasSimd();
-    const impl_name = hasher.getImplInfo();
-    std.debug.print("SIMD: {any}, Impl: {s}\n", .{ has_simd, impl_name });
-}