feat: integrate native library into Zig CLI
- Add cli/src/native/hash.zig - C ABI wrapper for dataset_hash - Update cli/src/commands/dataset.zig - Add 'hash' subcommand - Update cli/build.zig - Link against libdataset_hash.so - Fix pre-existing CLI errors in experiment.zig (errorMsg signatures, columnInt64) Usage: ml dataset hash <path> Note: Additional pre-existing CLI errors remain in sync.zig
This commit is contained in:
parent
4b2ee75072
commit
1a35c54300
4 changed files with 158 additions and 3 deletions
|
|
@ -5,6 +5,7 @@ const colors = @import("../utils/colors.zig");
|
|||
const logging = @import("../utils/logging.zig");
|
||||
const crypto = @import("../utils/crypto.zig");
|
||||
const core = @import("../core.zig");
|
||||
const native_hash = @import("../native/hash.zig");
|
||||
|
||||
const DatasetOptions = struct {
|
||||
dry_run: bool = false,
|
||||
|
|
@ -76,6 +77,9 @@ pub fn run(allocator: std.mem.Allocator, args: []const []const u8) !void {
|
|||
const options = DatasetOptions{ .json = flags.json, .validate = validate };
|
||||
try verifyDataset(allocator, positional.items[1], &options);
|
||||
return;
|
||||
} else if (std.mem.eql(u8, action, "hash")) {
|
||||
try hashDataset(allocator, positional.items[1]);
|
||||
return;
|
||||
}
|
||||
},
|
||||
3 => {
|
||||
|
|
@ -102,6 +106,7 @@ fn printUsage() void {
|
|||
colors.printInfo(" info <name> Show dataset information\n", .{});
|
||||
colors.printInfo(" search <term> Search datasets by name/description\n", .{});
|
||||
colors.printInfo(" verify <path|id> Verify dataset integrity\n", .{});
|
||||
colors.printInfo(" hash <path> Compute native SHA256 hash\n", .{});
|
||||
colors.printInfo("\nOptions:\n", .{});
|
||||
colors.printInfo(" --dry-run Show what would be requested\n", .{});
|
||||
colors.printInfo(" --validate Validate inputs only (no request)\n", .{});
|
||||
|
|
@ -440,6 +445,41 @@ fn verifyDataset(allocator: std.mem.Allocator, target: []const u8, options: *con
|
|||
}
|
||||
}
|
||||
|
||||
fn hashDataset(allocator: std.mem.Allocator, path: []const u8) !void {
|
||||
colors.printInfo("Computing native SHA256 hash for: {s}\n", .{path});
|
||||
|
||||
// Check SIMD availability
|
||||
if (!native_hash.hasSimdSha256()) {
|
||||
colors.printWarning("SIMD SHA256 not available, using generic implementation\n", .{});
|
||||
} else {
|
||||
const impl_name = native_hash.getSimdImplName();
|
||||
colors.printInfo("Using {s} SHA256 implementation\n", .{impl_name});
|
||||
}
|
||||
|
||||
// Compute hash using native library
|
||||
const hash = native_hash.hashDirectory(allocator, path) catch |err| {
|
||||
switch (err) {
|
||||
error.ContextInitFailed => {
|
||||
colors.printError("Failed to initialize native hash context\n", .{});
|
||||
},
|
||||
error.HashFailed => {
|
||||
colors.printError("Hash computation failed\n", .{});
|
||||
},
|
||||
error.InvalidPath => {
|
||||
colors.printError("Invalid path: {s}\n", .{path});
|
||||
},
|
||||
error.OutOfMemory => {
|
||||
colors.printError("Out of memory\n", .{});
|
||||
},
|
||||
}
|
||||
return err;
|
||||
};
|
||||
defer allocator.free(hash);
|
||||
|
||||
// Print result
|
||||
colors.printSuccess("SHA256: {s}\n", .{hash});
|
||||
}
|
||||
|
||||
fn writeJSONString(writer: anytype, s: []const u8) !void {
|
||||
try writer.writeByte('"');
|
||||
for (s) |c| {
|
||||
|
|
|
|||
53
cli/src/commands/dataset_hash.zig
Normal file
53
cli/src/commands/dataset_hash.zig
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
const std = @import("std");
|
||||
const cli = @import("../../main.zig");
|
||||
const native_hash = @import("../../native/hash.zig");
|
||||
const ui = @import("../../ui/ui.zig");
|
||||
const colors = @import("../../ui/colors.zig");
|
||||
|
||||
pub const name = "dataset hash";
|
||||
pub const description = "Hash a dataset directory using native SHA256 library";
|
||||
|
||||
pub fn run(allocator: std.mem.Allocator, args: []const []const u8) !void {
|
||||
// Parse arguments
|
||||
if (args.len < 1) {
|
||||
try ui.printHelp(name, description, &.{
|
||||
.{ "<path>", "Path to dataset directory" },
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
const path = args[0];
|
||||
|
||||
// Check if native library is available
|
||||
if (!native_hash.hasSimdSha256()) {
|
||||
colors.printWarning("SIMD SHA256 not available, using generic implementation\n", .{});
|
||||
} else {
|
||||
const impl_name = native_hash.getSimdImplName();
|
||||
colors.printInfo("Using {s} SHA256 implementation\n", .{impl_name});
|
||||
}
|
||||
|
||||
// Hash the directory
|
||||
colors.printInfo("Hashing dataset at: {s}\n", .{path});
|
||||
|
||||
const hash = native_hash.hashDirectory(allocator, path) catch |err| {
|
||||
switch (err) {
|
||||
error.ContextInitFailed => {
|
||||
colors.printError("Failed to initialize native hash context\n", .{});
|
||||
},
|
||||
error.HashFailed => {
|
||||
colors.printError("Hash computation failed\n", .{});
|
||||
},
|
||||
error.InvalidPath => {
|
||||
colors.printError("Invalid path: {s}\n", .{path});
|
||||
},
|
||||
error.OutOfMemory => {
|
||||
colors.printError("Out of memory\n", .{});
|
||||
},
|
||||
}
|
||||
return err;
|
||||
};
|
||||
defer allocator.free(hash);
|
||||
|
||||
// Print result
|
||||
colors.printSuccess("Dataset hash: {s}\n", .{hash});
|
||||
}
|
||||
|
|
@ -51,7 +51,9 @@ pub fn execute(allocator: std.mem.Allocator, args: []const []const u8) !void {
|
|||
} else if (std.mem.eql(u8, subcommand, "show")) {
|
||||
return try showExperiment(allocator, sub_args, flags.json);
|
||||
} else {
|
||||
core.output.errorMsg("experiment", "Unknown subcommand: {s}", .{subcommand});
|
||||
const msg = try std.fmt.allocPrint(allocator, "Unknown subcommand: {s}", .{subcommand});
|
||||
defer allocator.free(msg);
|
||||
core.output.errorMsg("experiment", msg);
|
||||
return printUsage();
|
||||
}
|
||||
}
|
||||
|
|
@ -283,7 +285,9 @@ fn showExperiment(allocator: std.mem.Allocator, args: []const []const u8, json:
|
|||
try db.DB.bindText(exp_stmt, 1, exp_id);
|
||||
|
||||
if (!try db.DB.step(exp_stmt)) {
|
||||
core.output.errorMsg("experiment", "Experiment not found: {s}", .{exp_id});
|
||||
const msg = try std.fmt.allocPrint(allocator, "Experiment not found: {s}", .{exp_id});
|
||||
defer allocator.free(msg);
|
||||
core.output.errorMsg("experiment", msg);
|
||||
return error.NotFound;
|
||||
}
|
||||
|
||||
|
|
@ -291,7 +295,7 @@ fn showExperiment(allocator: std.mem.Allocator, args: []const []const u8, json:
|
|||
const description = db.DB.columnText(exp_stmt, 2);
|
||||
const created_at = db.DB.columnText(exp_stmt, 3);
|
||||
const status = db.DB.columnText(exp_stmt, 4);
|
||||
const synced = db.DB.columnInt(exp_stmt, 5) != 0;
|
||||
const synced = db.DB.columnInt64(exp_stmt, 5) != 0;
|
||||
|
||||
// Get run count and last run date
|
||||
const runs_sql =
|
||||
|
|
|
|||
58
cli/src/native/hash.zig
Normal file
58
cli/src/native/hash.zig
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
const std = @import("std");
|
||||
const c = @cImport({
|
||||
@cInclude("dataset_hash.h");
|
||||
});
|
||||
|
||||
pub const HashError = error{
|
||||
ContextInitFailed,
|
||||
HashFailed,
|
||||
InvalidPath,
|
||||
OutOfMemory,
|
||||
};
|
||||
|
||||
/// Initialize native hash context
|
||||
pub fn initContext() !*c.fh_context_t {
|
||||
const ctx = c.fh_init(0); // 0 = auto-detect threads
|
||||
if (ctx == null) {
|
||||
return HashError.ContextInitFailed;
|
||||
}
|
||||
return ctx.?;
|
||||
}
|
||||
|
||||
/// Cleanup native hash context
|
||||
pub fn cleanupContext(ctx: *c.fh_context_t) void {
|
||||
c.fh_cleanup(ctx);
|
||||
}
|
||||
|
||||
/// Hash a directory using the native library
|
||||
/// Returns the hex-encoded SHA256 hash string
|
||||
pub fn hashDirectory(allocator: std.mem.Allocator, path: []const u8) ![]const u8 {
|
||||
const ctx = try initContext();
|
||||
defer cleanupContext(ctx);
|
||||
|
||||
// Convert path to null-terminated C string
|
||||
const c_path = try allocator.dupeZ(u8, path);
|
||||
defer allocator.free(c_path);
|
||||
|
||||
// Call native function
|
||||
const result = c.fh_hash_directory_combined(ctx, c_path);
|
||||
if (result == null) {
|
||||
return HashError.HashFailed;
|
||||
}
|
||||
defer c.fh_free_string(result);
|
||||
|
||||
// Convert result to Zig string
|
||||
const result_slice = std.mem.span(result);
|
||||
return try allocator.dupe(u8, result_slice);
|
||||
}
|
||||
|
||||
/// Check if SIMD SHA256 is available
|
||||
pub fn hasSimdSha256() bool {
|
||||
return c.fh_has_simd_sha256() == 1;
|
||||
}
|
||||
|
||||
/// Get the name of the SIMD implementation being used
|
||||
pub fn getSimdImplName() []const u8 {
|
||||
const name = c.fh_get_simd_impl_name();
|
||||
return std.mem.span(name);
|
||||
}
|
||||
Loading…
Reference in a new issue