feat(cli): add shared dataset_hash utility and automatic hashing
Created utils/dataset_hash.zig: - computeDatasetHash(allocator, path) -> [64]u8 - Returns fixed 64-char hex string (stack allocated) - Provides verifyDatasetIntegrity() for hash comparison - Enables testing against native C++ implementations Updated dataset.zig: - verifyDataset() now automatically computes hash during verification - Uses utils/dataset_hash.zig for hash computation - Hash displayed in JSON output for reference - No separate 'dataset hash' command needed Benefits: - Single source of truth for dataset hashing - Testable independently for correctness verification - Automatic during dataset verify operation
This commit is contained in:
parent
e2673be8b5
commit
cb018934e1
2 changed files with 57 additions and 19 deletions
|
|
@ -417,16 +417,15 @@ fn verifyDataset(allocator: std.mem.Allocator, target: []const u8, options: *con
|
|||
total_size += stat.size;
|
||||
}
|
||||
|
||||
// Compute SHA256 hash using pure Zig implementation
|
||||
// Compute SHA256 hash using shared utility
|
||||
const hash = blk: {
|
||||
const hash_mod = @import("../utils/hash.zig");
|
||||
break :blk hash_mod.hashDirectoryToHex(allocator, path) catch |err| {
|
||||
const hash_util = @import("../utils/dataset_hash.zig");
|
||||
break :blk hash_util.computeDatasetHash(allocator, path) catch |err| {
|
||||
std.debug.print("Hash computation failed: {s}\n", .{@errorName(err)});
|
||||
// Continue without hash - verification still succeeded
|
||||
break :blk null;
|
||||
};
|
||||
};
|
||||
// hash is [64]u8 array (stack allocated), not heap allocated - no need to free
|
||||
|
||||
if (options.json) {
|
||||
const stdout_file = std.fs.File{ .handle = std.posix.STDOUT_FILENO };
|
||||
|
|
@ -464,21 +463,6 @@ fn verifyDataset(allocator: std.mem.Allocator, target: []const u8, options: *con
|
|||
}
|
||||
}
|
||||
|
||||
fn hashDataset(allocator: std.mem.Allocator, path: []const u8) !void {
|
||||
std.debug.print("Computing SHA256 hash for: {s}\n", .{path});
|
||||
|
||||
const hash_mod = @import("../utils/hash.zig");
|
||||
|
||||
// Compute hash using pure Zig implementation
|
||||
const hash = hash_mod.hashDirectoryToHex(allocator, path) catch |err| {
|
||||
std.debug.print("Hash computation failed: {s}\n", .{@errorName(err)});
|
||||
return err;
|
||||
};
|
||||
|
||||
// Print result
|
||||
std.debug.print("SHA256: {s}\n", .{hash});
|
||||
}
|
||||
|
||||
fn writeJSONString(writer: anytype, s: []const u8) !void {
|
||||
try writer.writeByte('"');
|
||||
for (s) |c| {
|
||||
|
|
|
|||
54
cli/src/utils/dataset_hash.zig
Normal file
54
cli/src/utils/dataset_hash.zig
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
const std = @import("std");
|
||||
const hash = @import("hash.zig");
|
||||
const io = @import("io.zig");
|
||||
|
||||
/// Errors that can occur during dataset hashing
|
||||
pub const HashError = error{
|
||||
PathTraversalAttempt,
|
||||
NotAFile,
|
||||
EmptyDirectory,
|
||||
MaxDepthExceeded,
|
||||
OutOfMemory,
|
||||
};
|
||||
|
||||
/// Compute SHA256 hash of a dataset directory
|
||||
/// Returns 64-char hex string of the hash (stack allocated, no need to free)
|
||||
pub fn computeDatasetHash(allocator: std.mem.Allocator, path: []const u8) ![64]u8 {
|
||||
// Validate path (prevent traversal)
|
||||
if (std.mem.indexOf(u8, path, "..") != null) {
|
||||
return error.PathTraversalAttempt;
|
||||
}
|
||||
|
||||
// Check if path exists and is directory
|
||||
const stat = std.fs.cwd().statFile(path) catch |err| {
|
||||
return err;
|
||||
};
|
||||
|
||||
if (stat.kind != .directory) {
|
||||
return error.NotAFile;
|
||||
}
|
||||
|
||||
// Compute hash using existing hash module
|
||||
return hash.hashDirectoryToHex(allocator, path);
|
||||
}
|
||||
|
||||
/// Format hash result for display
|
||||
pub fn formatHashResult(hash_str: []const u8) []const u8 {
|
||||
return hash_str;
|
||||
}
|
||||
|
||||
/// Verify dataset integrity by comparing hashes
|
||||
pub fn verifyDatasetIntegrity(
|
||||
allocator: std.mem.Allocator,
|
||||
path: []const u8,
|
||||
expected_hash: ?[]const u8,
|
||||
) !struct { hash: [64]u8, valid: bool } {
|
||||
const computed_hash = try computeDatasetHash(allocator, path);
|
||||
|
||||
const valid = if (expected_hash) |expected|
|
||||
std.mem.eql(u8, &computed_hash, expected)
|
||||
else
|
||||
true;
|
||||
|
||||
return .{ .hash = computed_hash, .valid = valid };
|
||||
}
|
||||
Loading…
Reference in a new issue