feat: integrate native library into Zig CLI

- Add cli/src/native/hash.zig - C ABI wrapper for dataset_hash
- Update cli/src/commands/dataset.zig - Add 'hash' subcommand
- Update cli/build.zig - Link against libdataset_hash.so
- Fix pre-existing CLI errors in experiment.zig (errorMsg signatures, columnInt64)

Usage: ml dataset hash <path>

Note: Additional pre-existing CLI errors remain in sync.zig
This commit is contained in:
Jeremie Fraeys 2026-02-21 14:08:07 -05:00
parent 4b2ee75072
commit 1a35c54300
No known key found for this signature in database
4 changed files with 158 additions and 3 deletions

View file

@ -5,6 +5,7 @@ const colors = @import("../utils/colors.zig");
const logging = @import("../utils/logging.zig");
const crypto = @import("../utils/crypto.zig");
const core = @import("../core.zig");
const native_hash = @import("../native/hash.zig");
const DatasetOptions = struct {
dry_run: bool = false,
@ -76,6 +77,9 @@ pub fn run(allocator: std.mem.Allocator, args: []const []const u8) !void {
const options = DatasetOptions{ .json = flags.json, .validate = validate };
try verifyDataset(allocator, positional.items[1], &options);
return;
} else if (std.mem.eql(u8, action, "hash")) {
try hashDataset(allocator, positional.items[1]);
return;
}
},
3 => {
@ -102,6 +106,7 @@ fn printUsage() void {
colors.printInfo(" info <name> Show dataset information\n", .{});
colors.printInfo(" search <term> Search datasets by name/description\n", .{});
colors.printInfo(" verify <path|id> Verify dataset integrity\n", .{});
colors.printInfo(" hash <path> Compute native SHA256 hash\n", .{});
colors.printInfo("\nOptions:\n", .{});
colors.printInfo(" --dry-run Show what would be requested\n", .{});
colors.printInfo(" --validate Validate inputs only (no request)\n", .{});
@ -440,6 +445,41 @@ fn verifyDataset(allocator: std.mem.Allocator, target: []const u8, options: *con
}
}
fn hashDataset(allocator: std.mem.Allocator, path: []const u8) !void {
colors.printInfo("Computing native SHA256 hash for: {s}\n", .{path});
// Check SIMD availability
if (!native_hash.hasSimdSha256()) {
colors.printWarning("SIMD SHA256 not available, using generic implementation\n", .{});
} else {
const impl_name = native_hash.getSimdImplName();
colors.printInfo("Using {s} SHA256 implementation\n", .{impl_name});
}
// Compute hash using native library
const hash = native_hash.hashDirectory(allocator, path) catch |err| {
switch (err) {
error.ContextInitFailed => {
colors.printError("Failed to initialize native hash context\n", .{});
},
error.HashFailed => {
colors.printError("Hash computation failed\n", .{});
},
error.InvalidPath => {
colors.printError("Invalid path: {s}\n", .{path});
},
error.OutOfMemory => {
colors.printError("Out of memory\n", .{});
},
}
return err;
};
defer allocator.free(hash);
// Print result
colors.printSuccess("SHA256: {s}\n", .{hash});
}
fn writeJSONString(writer: anytype, s: []const u8) !void {
try writer.writeByte('"');
for (s) |c| {

View file

@ -0,0 +1,53 @@
const std = @import("std");
const cli = @import("../../main.zig");
const native_hash = @import("../../native/hash.zig");
const ui = @import("../../ui/ui.zig");
const colors = @import("../../ui/colors.zig");
pub const name = "dataset hash";
pub const description = "Hash a dataset directory using native SHA256 library";
pub fn run(allocator: std.mem.Allocator, args: []const []const u8) !void {
// Parse arguments
if (args.len < 1) {
try ui.printHelp(name, description, &.{
.{ "<path>", "Path to dataset directory" },
});
return;
}
const path = args[0];
// Check if native library is available
if (!native_hash.hasSimdSha256()) {
colors.printWarning("SIMD SHA256 not available, using generic implementation\n", .{});
} else {
const impl_name = native_hash.getSimdImplName();
colors.printInfo("Using {s} SHA256 implementation\n", .{impl_name});
}
// Hash the directory
colors.printInfo("Hashing dataset at: {s}\n", .{path});
const hash = native_hash.hashDirectory(allocator, path) catch |err| {
switch (err) {
error.ContextInitFailed => {
colors.printError("Failed to initialize native hash context\n", .{});
},
error.HashFailed => {
colors.printError("Hash computation failed\n", .{});
},
error.InvalidPath => {
colors.printError("Invalid path: {s}\n", .{path});
},
error.OutOfMemory => {
colors.printError("Out of memory\n", .{});
},
}
return err;
};
defer allocator.free(hash);
// Print result
colors.printSuccess("Dataset hash: {s}\n", .{hash});
}

View file

@ -51,7 +51,9 @@ pub fn execute(allocator: std.mem.Allocator, args: []const []const u8) !void {
} else if (std.mem.eql(u8, subcommand, "show")) {
return try showExperiment(allocator, sub_args, flags.json);
} else {
core.output.errorMsg("experiment", "Unknown subcommand: {s}", .{subcommand});
const msg = try std.fmt.allocPrint(allocator, "Unknown subcommand: {s}", .{subcommand});
defer allocator.free(msg);
core.output.errorMsg("experiment", msg);
return printUsage();
}
}
@ -283,7 +285,9 @@ fn showExperiment(allocator: std.mem.Allocator, args: []const []const u8, json:
try db.DB.bindText(exp_stmt, 1, exp_id);
if (!try db.DB.step(exp_stmt)) {
core.output.errorMsg("experiment", "Experiment not found: {s}", .{exp_id});
const msg = try std.fmt.allocPrint(allocator, "Experiment not found: {s}", .{exp_id});
defer allocator.free(msg);
core.output.errorMsg("experiment", msg);
return error.NotFound;
}
@ -291,7 +295,7 @@ fn showExperiment(allocator: std.mem.Allocator, args: []const []const u8, json:
const description = db.DB.columnText(exp_stmt, 2);
const created_at = db.DB.columnText(exp_stmt, 3);
const status = db.DB.columnText(exp_stmt, 4);
const synced = db.DB.columnInt(exp_stmt, 5) != 0;
const synced = db.DB.columnInt64(exp_stmt, 5) != 0;
// Get run count and last run date
const runs_sql =

58
cli/src/native/hash.zig Normal file
View file

@ -0,0 +1,58 @@
const std = @import("std");
const c = @cImport({
@cInclude("dataset_hash.h");
});
pub const HashError = error{
ContextInitFailed,
HashFailed,
InvalidPath,
OutOfMemory,
};
/// Initialize native hash context
pub fn initContext() !*c.fh_context_t {
const ctx = c.fh_init(0); // 0 = auto-detect threads
if (ctx == null) {
return HashError.ContextInitFailed;
}
return ctx.?;
}
/// Cleanup native hash context
pub fn cleanupContext(ctx: *c.fh_context_t) void {
c.fh_cleanup(ctx);
}
/// Hash a directory using the native library
/// Returns the hex-encoded SHA256 hash string
pub fn hashDirectory(allocator: std.mem.Allocator, path: []const u8) ![]const u8 {
const ctx = try initContext();
defer cleanupContext(ctx);
// Convert path to null-terminated C string
const c_path = try allocator.dupeZ(u8, path);
defer allocator.free(c_path);
// Call native function
const result = c.fh_hash_directory_combined(ctx, c_path);
if (result == null) {
return HashError.HashFailed;
}
defer c.fh_free_string(result);
// Convert result to Zig string
const result_slice = std.mem.span(result);
return try allocator.dupe(u8, result_slice);
}
/// Check if SIMD SHA256 is available
pub fn hasSimdSha256() bool {
return c.fh_has_simd_sha256() == 1;
}
/// Get the name of the SIMD implementation being used
pub fn getSimdImplName() []const u8 {
const name = c.fh_get_simd_impl_name();
return std.mem.span(name);
}