refactor(cli): remove deprecated native hash modules
Remove obsolete native hash implementation files: - Delete native/hash.zig (superseded by utils/hash.zig) - Delete utils/native_bridge.zig (replaced by direct TLS) - Delete utils/native_hash.zig (consolidated into utils/hash.zig) Cleanup as part of CLI hardening.
This commit is contained in:
parent
4c2af17ad6
commit
6a0555207e
3 changed files with 0 additions and 420 deletions
|
|
@ -1,103 +0,0 @@
|
|||
const std = @import("std");
|
||||
const build_options = @import("build_options");
|
||||
|
||||
pub const HashError = error{
|
||||
ContextInitFailed,
|
||||
HashFailed,
|
||||
InvalidPath,
|
||||
OutOfMemory,
|
||||
NotAvailable,
|
||||
};
|
||||
|
||||
// Conditionally compile C imports only when not cross-compiling
|
||||
const c = if (build_options.is_cross_compiling)
|
||||
struct {
|
||||
pub const fh_context_t = opaque {};
|
||||
pub fn fh_init(_: i32) ?*fh_context_t {
|
||||
return null;
|
||||
}
|
||||
pub fn fh_hash_directory_combined(_: *fh_context_t, _: [*c]const u8) [*c]u8 {
|
||||
return null;
|
||||
}
|
||||
pub fn fh_free_string(_: [*c]u8) void {}
|
||||
pub fn fh_has_simd_sha256() i32 {
|
||||
return 0;
|
||||
}
|
||||
pub fn fh_get_simd_impl_name() [*c]const u8 {
|
||||
return @ptrCast(@alignCast("none"));
|
||||
}
|
||||
}
|
||||
else
|
||||
@cImport({
|
||||
@cInclude("dataset_hash.h");
|
||||
});
|
||||
|
||||
// Global context for reuse across multiple hash operations
|
||||
var global_ctx: ?*c.fh_context_t = null;
|
||||
var ctx_initialized = std.atomic.Value(bool).init(false);
|
||||
var init_mutex = std.Thread.Mutex{};
|
||||
|
||||
/// Initialize global hash context once (thread-safe)
|
||||
pub fn init() !void {
|
||||
if (build_options.is_cross_compiling) {
|
||||
return HashError.NotAvailable;
|
||||
}
|
||||
|
||||
if (ctx_initialized.load(.seq_cst)) return;
|
||||
|
||||
init_mutex.lock();
|
||||
defer init_mutex.unlock();
|
||||
|
||||
if (ctx_initialized.load(.seq_cst)) return; // Double-check
|
||||
|
||||
const start = std.time.milliTimestamp();
|
||||
global_ctx = c.fh_init(0); // 0 = auto-detect threads
|
||||
const elapsed = std.time.milliTimestamp() - start;
|
||||
|
||||
if (global_ctx == null) {
|
||||
return HashError.ContextInitFailed;
|
||||
}
|
||||
|
||||
ctx_initialized.store(true, .seq_cst);
|
||||
std.log.info("[native] hash context initialized: {}ms", .{elapsed});
|
||||
}
|
||||
|
||||
/// Hash a directory using the native library (reuses global context)
|
||||
/// Returns the hex-encoded SHA256 hash string
|
||||
pub fn hashDirectory(allocator: std.mem.Allocator, path: []const u8) ![]const u8 {
|
||||
if (build_options.is_cross_compiling) {
|
||||
return HashError.NotAvailable;
|
||||
}
|
||||
|
||||
try init(); // Idempotent initialization
|
||||
|
||||
const ctx = global_ctx.?; // Safe: init() guarantees non-null
|
||||
|
||||
// Convert path to null-terminated C string
|
||||
const c_path = try allocator.dupeZ(u8, path);
|
||||
defer allocator.free(c_path);
|
||||
|
||||
// Call native function
|
||||
const result = c.fh_hash_directory_combined(ctx, c_path);
|
||||
if (result == null) {
|
||||
return HashError.HashFailed;
|
||||
}
|
||||
defer c.fh_free_string(result);
|
||||
|
||||
// Convert result to Zig string
|
||||
const result_slice = std.mem.span(result);
|
||||
return try allocator.dupe(u8, result_slice);
|
||||
}
|
||||
|
||||
/// Check if SIMD SHA256 is available
|
||||
pub fn hasSimdSha256() bool {
|
||||
if (build_options.is_cross_compiling) return false;
|
||||
return c.fh_has_simd_sha256() == 1;
|
||||
}
|
||||
|
||||
/// Get the name of the SIMD implementation being used
|
||||
pub fn getSimdImplName() []const u8 {
|
||||
if (build_options.is_cross_compiling) return "none";
|
||||
const name = c.fh_get_simd_impl_name();
|
||||
return std.mem.span(name);
|
||||
}
|
||||
|
|
@ -1,122 +0,0 @@
|
|||
//! Native library bridge for high-performance operations
|
||||
//!
|
||||
//! Provides Zig bindings to the native/ C++ libraries:
|
||||
//! - dataset_hash: SIMD-accelerated SHA256 hashing
|
||||
//! - queue_index: High-performance task queue
|
||||
//!
|
||||
//! The native libraries provide:
|
||||
//! - 78% syscall reduction for hashing
|
||||
//! - 21,000x faster queue operations
|
||||
//! - Hardware acceleration (SHA-NI, ARMv8 crypto)
|
||||
|
||||
const std = @import("std");
|
||||
|
||||
// Link against native dataset_hash library
|
||||
const c = @cImport({
|
||||
@cInclude("dataset_hash.h");
|
||||
});
|
||||
|
||||
/// Opaque handle for native hash context
|
||||
pub const HashContext = opaque {};
|
||||
|
||||
/// Initialize hash context with thread pool
|
||||
/// num_threads: 0 = auto-detect (capped at 8)
|
||||
pub fn initHashContext(num_threads: u32) ?*HashContext {
|
||||
return @ptrCast(c.fh_init(num_threads));
|
||||
}
|
||||
|
||||
/// Cleanup hash context
|
||||
pub fn cleanupHashContext(ctx: ?*HashContext) void {
|
||||
if (ctx) |ptr| {
|
||||
c.fh_cleanup(@ptrCast(ptr));
|
||||
}
|
||||
}
|
||||
|
||||
/// Hash a single file using native SIMD implementation
|
||||
/// Returns hex string (caller must free with freeString)
|
||||
pub fn hashFile(ctx: ?*HashContext, path: []const u8) ![]const u8 {
|
||||
const c_path = try std.heap.c_allocator.dupeZ(u8, path);
|
||||
defer std.heap.c_allocator.free(c_path);
|
||||
|
||||
const result = c.fh_hash_file(@ptrCast(ctx), c_path.ptr);
|
||||
if (result == null) {
|
||||
return error.HashFailed;
|
||||
}
|
||||
defer c.fh_free_string(result);
|
||||
|
||||
const len = std.mem.len(result);
|
||||
return try std.heap.c_allocator.dupe(u8, result[0..len]);
|
||||
}
|
||||
|
||||
/// Hash entire directory (parallel, combined result)
|
||||
pub fn hashDirectory(ctx: ?*HashContext, path: []const u8) ![]const u8 {
|
||||
const c_path = try std.heap.c_allocator.dupeZ(u8, path);
|
||||
defer std.heap.c_allocator.free(c_path);
|
||||
|
||||
const result = c.fh_hash_directory(@ptrCast(ctx), c_path.ptr);
|
||||
if (result == null) {
|
||||
return error.HashFailed;
|
||||
}
|
||||
defer c.fh_free_string(result);
|
||||
|
||||
const len = std.mem.len(result);
|
||||
return try std.heap.c_allocator.dupe(u8, result[0..len]);
|
||||
}
|
||||
|
||||
/// Free string returned by native library
|
||||
pub fn freeString(str: []const u8) void {
|
||||
std.heap.c_allocator.free(str);
|
||||
}
|
||||
|
||||
/// Hash data using native library (convenience function)
|
||||
pub fn hashData(data: []const u8) ![64]u8 {
|
||||
// Write data to temp file and hash it
|
||||
const tmp_path = try std.fs.path.join(std.heap.c_allocator, &.{ "/tmp", "fetchml_hash_tmp" });
|
||||
defer std.heap.c_allocator.free(tmp_path);
|
||||
|
||||
try std.fs.cwd().writeFile(.{
|
||||
.sub_path = tmp_path,
|
||||
.data = data,
|
||||
});
|
||||
defer std.fs.cwd().deleteFile(tmp_path) catch {};
|
||||
|
||||
const ctx = initHashContext(0) orelse return error.InitFailed;
|
||||
defer cleanupHashContext(ctx);
|
||||
|
||||
const hash_str = try hashFile(ctx, tmp_path);
|
||||
defer freeString(hash_str);
|
||||
|
||||
// Parse hex string to bytes
|
||||
var result: [64]u8 = undefined;
|
||||
@memcpy(&result, hash_str[0..64]);
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Benchmark native vs standard hashing
|
||||
pub fn benchmark(allocator: std.mem.Allocator, path: []const u8, iterations: u32) !void {
|
||||
const ctx = initHashContext(0) orelse {
|
||||
std.debug.print("Failed to initialize native hash context\n", .{});
|
||||
return;
|
||||
};
|
||||
defer cleanupHashContext(ctx);
|
||||
|
||||
var timer = try std.time.Timer.start();
|
||||
|
||||
// Warm up
|
||||
_ = try hashFile(ctx, path);
|
||||
|
||||
// Benchmark native
|
||||
timer.reset();
|
||||
for (0..iterations) |_| {
|
||||
const hash = try hashFile(ctx, path);
|
||||
freeString(hash);
|
||||
}
|
||||
const native_time = timer.read();
|
||||
|
||||
std.debug.print("Native SIMD SHA256: {} ms for {d} iterations\n", .{
|
||||
native_time / std.time.ns_per_ms,
|
||||
iterations,
|
||||
});
|
||||
|
||||
_ = allocator; // Reserved for future comparison with Zig implementation
|
||||
}
|
||||
|
|
@ -1,195 +0,0 @@
|
|||
const std = @import("std");
|
||||
const c = @cImport({
|
||||
@cInclude("dataset_hash.h");
|
||||
});
|
||||
|
||||
/// Native hash context for high-performance file hashing
|
||||
pub const NativeHasher = struct {
|
||||
ctx: *c.fh_context_t,
|
||||
allocator: std.mem.Allocator,
|
||||
|
||||
/// Initialize native hasher with thread pool
|
||||
/// num_threads: 0 = auto-detect (use hardware concurrency)
|
||||
pub fn init(allocator: std.mem.Allocator, num_threads: u32) !NativeHasher {
|
||||
const ctx = c.fh_init(num_threads);
|
||||
if (ctx == null) return error.NativeInitFailed;
|
||||
|
||||
return .{
|
||||
.ctx = ctx,
|
||||
.allocator = allocator,
|
||||
};
|
||||
}
|
||||
|
||||
/// Cleanup native hasher and thread pool
|
||||
pub fn deinit(self: *NativeHasher) void {
|
||||
c.fh_cleanup(self.ctx);
|
||||
}
|
||||
|
||||
/// Hash a single file
|
||||
pub fn hashFile(self: *NativeHasher, path: []const u8) ![]const u8 {
|
||||
const c_path = try self.allocator.dupeZ(u8, path);
|
||||
defer self.allocator.free(c_path);
|
||||
|
||||
const result = c.fh_hash_file(self.ctx, c_path.ptr);
|
||||
if (result == null) return error.HashFailed;
|
||||
defer c.fh_free_string(result);
|
||||
|
||||
return try self.allocator.dupe(u8, std.mem.span(result));
|
||||
}
|
||||
|
||||
/// Batch hash multiple files (amortizes CGo overhead)
|
||||
pub fn hashBatch(self: *NativeHasher, paths: []const []const u8) ![][]const u8 {
|
||||
// Convert paths to C string array
|
||||
const c_paths = try self.allocator.alloc([*c]const u8, paths.len);
|
||||
defer self.allocator.free(c_paths);
|
||||
|
||||
for (paths, 0..) |path, i| {
|
||||
const c_path = try self.allocator.dupeZ(u8, path);
|
||||
c_paths[i] = c_path.ptr;
|
||||
// Note: we need to keep these alive until after fh_hash_batch
|
||||
}
|
||||
defer {
|
||||
for (c_paths) |p| {
|
||||
self.allocator.free(std.mem.span(p));
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate results array
|
||||
const results = try self.allocator.alloc([*c]u8, paths.len);
|
||||
defer self.allocator.free(results);
|
||||
|
||||
// Call native batch hash
|
||||
const ret = c.fh_hash_batch(self.ctx, c_paths.ptr, @intCast(paths.len), results.ptr);
|
||||
if (ret != 0) return error.HashFailed;
|
||||
|
||||
// Convert results to Zig strings
|
||||
var hashes = try self.allocator.alloc([]const u8, paths.len);
|
||||
errdefer {
|
||||
for (hashes) |h| self.allocator.free(h);
|
||||
self.allocator.free(hashes);
|
||||
}
|
||||
|
||||
for (results, 0..) |r, i| {
|
||||
hashes[i] = try self.allocator.dupe(u8, std.mem.span(r));
|
||||
c.fh_free_string(r);
|
||||
}
|
||||
|
||||
return hashes;
|
||||
}
|
||||
|
||||
/// Hash entire directory (combined hash)
|
||||
pub fn hashDirectory(self: *NativeHasher, dir_path: []const u8) ![]const u8 {
|
||||
const c_path = try self.allocator.dupeZ(u8, dir_path);
|
||||
defer self.allocator.free(c_path);
|
||||
|
||||
const result = c.fh_hash_directory(self.ctx, c_path.ptr);
|
||||
if (result == null) return error.HashFailed;
|
||||
defer c.fh_free_string(result);
|
||||
|
||||
return try self.allocator.dupe(u8, std.mem.span(result));
|
||||
}
|
||||
|
||||
/// Hash directory with batch output (individual file hashes)
|
||||
pub fn hashDirectoryBatch(
|
||||
self: *NativeHasher,
|
||||
dir_path: []const u8,
|
||||
max_results: u32,
|
||||
) !struct { hashes: [][]const u8, paths: [][]const u8, count: u32 } {
|
||||
const c_path = try self.allocator.dupeZ(u8, dir_path);
|
||||
defer self.allocator.free(c_path);
|
||||
|
||||
// Allocate output arrays
|
||||
const hashes = try self.allocator.alloc([*c]u8, max_results);
|
||||
defer self.allocator.free(hashes);
|
||||
|
||||
const paths = try self.allocator.alloc([*c]u8, max_results);
|
||||
defer self.allocator.free(paths);
|
||||
|
||||
var count: u32 = 0;
|
||||
|
||||
const ret = c.fh_hash_directory_batch(
|
||||
self.ctx,
|
||||
c_path.ptr,
|
||||
hashes.ptr,
|
||||
paths.ptr,
|
||||
max_results,
|
||||
&count,
|
||||
);
|
||||
if (ret != 0) return error.HashFailed;
|
||||
|
||||
// Convert to Zig arrays
|
||||
var zig_hashes = try self.allocator.alloc([]const u8, count);
|
||||
errdefer {
|
||||
for (zig_hashes) |h| self.allocator.free(h);
|
||||
self.allocator.free(zig_hashes);
|
||||
}
|
||||
|
||||
var zig_paths = try self.allocator.alloc([]const u8, count);
|
||||
errdefer {
|
||||
for (zig_paths) |p| self.allocator.free(p);
|
||||
self.allocator.free(zig_paths);
|
||||
}
|
||||
|
||||
for (0..count) |i| {
|
||||
zig_hashes[i] = try self.allocator.dupe(u8, std.mem.span(hashes[i]));
|
||||
c.fh_free_string(hashes[i]);
|
||||
|
||||
zig_paths[i] = try self.allocator.dupe(u8, std.mem.span(paths[i]));
|
||||
c.fh_free_string(paths[i]);
|
||||
}
|
||||
|
||||
return .{
|
||||
.hashes = zig_hashes,
|
||||
.paths = zig_paths,
|
||||
.count = count,
|
||||
};
|
||||
}
|
||||
|
||||
/// Check if SIMD SHA-256 is available
|
||||
pub fn hasSimd(self: *NativeHasher) bool {
|
||||
_ = self;
|
||||
return c.fh_has_simd_sha256() != 0;
|
||||
}
|
||||
|
||||
/// Get implementation info (SIMD type, etc.)
|
||||
pub fn getImplInfo(self: *NativeHasher) []const u8 {
|
||||
_ = self;
|
||||
return std.mem.span(c.fh_get_simd_impl_name());
|
||||
}
|
||||
};
|
||||
|
||||
/// Convenience function: hash directory using native library
|
||||
pub fn hashDirectoryNative(allocator: std.mem.Allocator, dir_path: []const u8) ![]const u8 {
|
||||
var hasher = try NativeHasher.init(allocator, 0); // Auto-detect threads
|
||||
defer hasher.deinit();
|
||||
return try hasher.hashDirectory(dir_path);
|
||||
}
|
||||
|
||||
/// Convenience function: batch hash files using native library
|
||||
pub fn hashFilesNative(
|
||||
allocator: std.mem.Allocator,
|
||||
paths: []const []const u8,
|
||||
) ![][]const u8 {
|
||||
var hasher = try NativeHasher.init(allocator, 0);
|
||||
defer hasher.deinit();
|
||||
return try hasher.hashBatch(paths);
|
||||
}
|
||||
|
||||
test "NativeHasher basic operations" {
|
||||
const allocator = std.testing.allocator;
|
||||
|
||||
// Skip if native library not available
|
||||
var hasher = NativeHasher.init(allocator, 1) catch |err| {
|
||||
if (err == error.NativeInitFailed) {
|
||||
std.debug.print("Native library not available, skipping test\n", .{});
|
||||
return;
|
||||
}
|
||||
return err;
|
||||
};
|
||||
defer hasher.deinit();
|
||||
|
||||
// Check SIMD availability
|
||||
const has_simd = hasher.hasSimd();
|
||||
const impl_name = hasher.getImplInfo();
|
||||
std.debug.print("SIMD: {any}, Impl: {s}\n", .{ has_simd, impl_name });
|
||||
}
|
||||
Loading…
Reference in a new issue