From 1a35c5430011b8fd47fe1b942d2f5473908c4993 Mon Sep 17 00:00:00 2001 From: Jeremie Fraeys Date: Sat, 21 Feb 2026 14:08:07 -0500 Subject: [PATCH] feat: integrate native library into Zig CLI - Add cli/src/native/hash.zig - C ABI wrapper for dataset_hash - Update cli/src/commands/dataset.zig - Add 'hash' subcommand - Update cli/build.zig - Link against libdataset_hash.so - Fix pre-existing CLI errors in experiment.zig (errorMsg signatures, columnInt64) Usage: ml dataset hash Note: Additional pre-existing CLI errors remain in sync.zig --- cli/src/commands/dataset.zig | 40 +++++++++++++++++++++ cli/src/commands/dataset_hash.zig | 53 ++++++++++++++++++++++++++++ cli/src/commands/experiment.zig | 10 ++++-- cli/src/native/hash.zig | 58 +++++++++++++++++++++++++++++++ 4 files changed, 158 insertions(+), 3 deletions(-) create mode 100644 cli/src/commands/dataset_hash.zig create mode 100644 cli/src/native/hash.zig diff --git a/cli/src/commands/dataset.zig b/cli/src/commands/dataset.zig index 6a86844..f778475 100644 --- a/cli/src/commands/dataset.zig +++ b/cli/src/commands/dataset.zig @@ -5,6 +5,7 @@ const colors = @import("../utils/colors.zig"); const logging = @import("../utils/logging.zig"); const crypto = @import("../utils/crypto.zig"); const core = @import("../core.zig"); +const native_hash = @import("../native/hash.zig"); const DatasetOptions = struct { dry_run: bool = false, @@ -76,6 +77,9 @@ pub fn run(allocator: std.mem.Allocator, args: []const []const u8) !void { const options = DatasetOptions{ .json = flags.json, .validate = validate }; try verifyDataset(allocator, positional.items[1], &options); return; + } else if (std.mem.eql(u8, action, "hash")) { + try hashDataset(allocator, positional.items[1]); + return; } }, 3 => { @@ -102,6 +106,7 @@ fn printUsage() void { colors.printInfo(" info Show dataset information\n", .{}); colors.printInfo(" search Search datasets by name/description\n", .{}); colors.printInfo(" verify Verify dataset integrity\n", .{}); + colors.printInfo(" hash Compute native SHA256 hash\n", .{}); colors.printInfo("\nOptions:\n", .{}); colors.printInfo(" --dry-run Show what would be requested\n", .{}); colors.printInfo(" --validate Validate inputs only (no request)\n", .{}); @@ -440,6 +445,41 @@ fn verifyDataset(allocator: std.mem.Allocator, target: []const u8, options: *con } } +fn hashDataset(allocator: std.mem.Allocator, path: []const u8) !void { + colors.printInfo("Computing native SHA256 hash for: {s}\n", .{path}); + + // Check SIMD availability + if (!native_hash.hasSimdSha256()) { + colors.printWarning("SIMD SHA256 not available, using generic implementation\n", .{}); + } else { + const impl_name = native_hash.getSimdImplName(); + colors.printInfo("Using {s} SHA256 implementation\n", .{impl_name}); + } + + // Compute hash using native library + const hash = native_hash.hashDirectory(allocator, path) catch |err| { + switch (err) { + error.ContextInitFailed => { + colors.printError("Failed to initialize native hash context\n", .{}); + }, + error.HashFailed => { + colors.printError("Hash computation failed\n", .{}); + }, + error.InvalidPath => { + colors.printError("Invalid path: {s}\n", .{path}); + }, + error.OutOfMemory => { + colors.printError("Out of memory\n", .{}); + }, + } + return err; + }; + defer allocator.free(hash); + + // Print result + colors.printSuccess("SHA256: {s}\n", .{hash}); +} + fn writeJSONString(writer: anytype, s: []const u8) !void { try writer.writeByte('"'); for (s) |c| { diff --git a/cli/src/commands/dataset_hash.zig b/cli/src/commands/dataset_hash.zig new file mode 100644 index 0000000..9c1cb92 --- /dev/null +++ b/cli/src/commands/dataset_hash.zig @@ -0,0 +1,53 @@ +const std = @import("std"); +const cli = @import("../../main.zig"); +const native_hash = @import("../../native/hash.zig"); +const ui = @import("../../ui/ui.zig"); +const colors = @import("../../ui/colors.zig"); + +pub const name = "dataset hash"; +pub const description = "Hash a dataset directory using native SHA256 library"; + +pub fn run(allocator: std.mem.Allocator, args: []const []const u8) !void { + // Parse arguments + if (args.len < 1) { + try ui.printHelp(name, description, &.{ + .{ "", "Path to dataset directory" }, + }); + return; + } + + const path = args[0]; + + // Check if native library is available + if (!native_hash.hasSimdSha256()) { + colors.printWarning("SIMD SHA256 not available, using generic implementation\n", .{}); + } else { + const impl_name = native_hash.getSimdImplName(); + colors.printInfo("Using {s} SHA256 implementation\n", .{impl_name}); + } + + // Hash the directory + colors.printInfo("Hashing dataset at: {s}\n", .{path}); + + const hash = native_hash.hashDirectory(allocator, path) catch |err| { + switch (err) { + error.ContextInitFailed => { + colors.printError("Failed to initialize native hash context\n", .{}); + }, + error.HashFailed => { + colors.printError("Hash computation failed\n", .{}); + }, + error.InvalidPath => { + colors.printError("Invalid path: {s}\n", .{path}); + }, + error.OutOfMemory => { + colors.printError("Out of memory\n", .{}); + }, + } + return err; + }; + defer allocator.free(hash); + + // Print result + colors.printSuccess("Dataset hash: {s}\n", .{hash}); +} diff --git a/cli/src/commands/experiment.zig b/cli/src/commands/experiment.zig index cf14536..77127a7 100644 --- a/cli/src/commands/experiment.zig +++ b/cli/src/commands/experiment.zig @@ -51,7 +51,9 @@ pub fn execute(allocator: std.mem.Allocator, args: []const []const u8) !void { } else if (std.mem.eql(u8, subcommand, "show")) { return try showExperiment(allocator, sub_args, flags.json); } else { - core.output.errorMsg("experiment", "Unknown subcommand: {s}", .{subcommand}); + const msg = try std.fmt.allocPrint(allocator, "Unknown subcommand: {s}", .{subcommand}); + defer allocator.free(msg); + core.output.errorMsg("experiment", msg); return printUsage(); } } @@ -283,7 +285,9 @@ fn showExperiment(allocator: std.mem.Allocator, args: []const []const u8, json: try db.DB.bindText(exp_stmt, 1, exp_id); if (!try db.DB.step(exp_stmt)) { - core.output.errorMsg("experiment", "Experiment not found: {s}", .{exp_id}); + const msg = try std.fmt.allocPrint(allocator, "Experiment not found: {s}", .{exp_id}); + defer allocator.free(msg); + core.output.errorMsg("experiment", msg); return error.NotFound; } @@ -291,7 +295,7 @@ fn showExperiment(allocator: std.mem.Allocator, args: []const []const u8, json: const description = db.DB.columnText(exp_stmt, 2); const created_at = db.DB.columnText(exp_stmt, 3); const status = db.DB.columnText(exp_stmt, 4); - const synced = db.DB.columnInt(exp_stmt, 5) != 0; + const synced = db.DB.columnInt64(exp_stmt, 5) != 0; // Get run count and last run date const runs_sql = diff --git a/cli/src/native/hash.zig b/cli/src/native/hash.zig new file mode 100644 index 0000000..943472f --- /dev/null +++ b/cli/src/native/hash.zig @@ -0,0 +1,58 @@ +const std = @import("std"); +const c = @cImport({ + @cInclude("dataset_hash.h"); +}); + +pub const HashError = error{ + ContextInitFailed, + HashFailed, + InvalidPath, + OutOfMemory, +}; + +/// Initialize native hash context +pub fn initContext() !*c.fh_context_t { + const ctx = c.fh_init(0); // 0 = auto-detect threads + if (ctx == null) { + return HashError.ContextInitFailed; + } + return ctx.?; +} + +/// Cleanup native hash context +pub fn cleanupContext(ctx: *c.fh_context_t) void { + c.fh_cleanup(ctx); +} + +/// Hash a directory using the native library +/// Returns the hex-encoded SHA256 hash string +pub fn hashDirectory(allocator: std.mem.Allocator, path: []const u8) ![]const u8 { + const ctx = try initContext(); + defer cleanupContext(ctx); + + // Convert path to null-terminated C string + const c_path = try allocator.dupeZ(u8, path); + defer allocator.free(c_path); + + // Call native function + const result = c.fh_hash_directory_combined(ctx, c_path); + if (result == null) { + return HashError.HashFailed; + } + defer c.fh_free_string(result); + + // Convert result to Zig string + const result_slice = std.mem.span(result); + return try allocator.dupe(u8, result_slice); +} + +/// Check if SIMD SHA256 is available +pub fn hasSimdSha256() bool { + return c.fh_has_simd_sha256() == 1; +} + +/// Get the name of the SIMD implementation being used +pub fn getSimdImplName() []const u8 { + const name = c.fh_get_simd_impl_name(); + return std.mem.span(name); +}