diff --git a/cli/src/commands/dataset.zig b/cli/src/commands/dataset.zig index e1a3e06..d6e57de 100644 --- a/cli/src/commands/dataset.zig +++ b/cli/src/commands/dataset.zig @@ -417,16 +417,15 @@ fn verifyDataset(allocator: std.mem.Allocator, target: []const u8, options: *con total_size += stat.size; } - // Compute SHA256 hash using pure Zig implementation + // Compute SHA256 hash using shared utility const hash = blk: { - const hash_mod = @import("../utils/hash.zig"); - break :blk hash_mod.hashDirectoryToHex(allocator, path) catch |err| { + const hash_util = @import("../utils/dataset_hash.zig"); + break :blk hash_util.computeDatasetHash(allocator, path) catch |err| { std.debug.print("Hash computation failed: {s}\n", .{@errorName(err)}); // Continue without hash - verification still succeeded break :blk null; }; }; - // hash is [64]u8 array (stack allocated), not heap allocated - no need to free if (options.json) { const stdout_file = std.fs.File{ .handle = std.posix.STDOUT_FILENO }; @@ -464,21 +463,6 @@ fn verifyDataset(allocator: std.mem.Allocator, target: []const u8, options: *con } } -fn hashDataset(allocator: std.mem.Allocator, path: []const u8) !void { - std.debug.print("Computing SHA256 hash for: {s}\n", .{path}); - - const hash_mod = @import("../utils/hash.zig"); - - // Compute hash using pure Zig implementation - const hash = hash_mod.hashDirectoryToHex(allocator, path) catch |err| { - std.debug.print("Hash computation failed: {s}\n", .{@errorName(err)}); - return err; - }; - - // Print result - std.debug.print("SHA256: {s}\n", .{hash}); -} - fn writeJSONString(writer: anytype, s: []const u8) !void { try writer.writeByte('"'); for (s) |c| { diff --git a/cli/src/utils/dataset_hash.zig b/cli/src/utils/dataset_hash.zig new file mode 100644 index 0000000..67bebca --- /dev/null +++ b/cli/src/utils/dataset_hash.zig @@ -0,0 +1,54 @@ +const std = @import("std"); +const hash = @import("hash.zig"); +const io = @import("io.zig"); + +/// Errors that can occur during dataset hashing +pub const HashError = error{ + PathTraversalAttempt, + NotAFile, + EmptyDirectory, + MaxDepthExceeded, + OutOfMemory, +}; + +/// Compute SHA256 hash of a dataset directory +/// Returns 64-char hex string of the hash (stack allocated, no need to free) +pub fn computeDatasetHash(allocator: std.mem.Allocator, path: []const u8) ![64]u8 { + // Validate path (prevent traversal) + if (std.mem.indexOf(u8, path, "..") != null) { + return error.PathTraversalAttempt; + } + + // Check if path exists and is directory + const stat = std.fs.cwd().statFile(path) catch |err| { + return err; + }; + + if (stat.kind != .directory) { + return error.NotAFile; + } + + // Compute hash using existing hash module + return hash.hashDirectoryToHex(allocator, path); +} + +/// Format hash result for display +pub fn formatHashResult(hash_str: []const u8) []const u8 { + return hash_str; +} + +/// Verify dataset integrity by comparing hashes +pub fn verifyDatasetIntegrity( + allocator: std.mem.Allocator, + path: []const u8, + expected_hash: ?[]const u8, +) !struct { hash: [64]u8, valid: bool } { + const computed_hash = try computeDatasetHash(allocator, path); + + const valid = if (expected_hash) |expected| + std.mem.eql(u8, &computed_hash, expected) + else + true; + + return .{ .hash = computed_hash, .valid = valid }; +}