refactor: make dataset hash automatic in verify command

- Remove separate 'hash' subcommand
- Integrate native SHA256 hash into 'dataset verify'
- Hash is now computed automatically when verifying datasets
- Shows hash in output (JSON, CSV, and text formats)
- Help text updated to indicate auto-hashing
This commit is contained in:
Jeremie Fraeys 2026-02-21 14:09:44 -05:00
parent 1a35c54300
commit 25ae791b5c
No known key found for this signature in database

View file

@ -77,9 +77,6 @@ pub fn run(allocator: std.mem.Allocator, args: []const []const u8) !void {
const options = DatasetOptions{ .json = flags.json, .validate = validate };
try verifyDataset(allocator, positional.items[1], &options);
return;
} else if (std.mem.eql(u8, action, "hash")) {
try hashDataset(allocator, positional.items[1]);
return;
}
},
3 => {
@ -105,8 +102,7 @@ fn printUsage() void {
colors.printInfo(" register <name> <url> Register a dataset with URL\n", .{});
colors.printInfo(" info <name> Show dataset information\n", .{});
colors.printInfo(" search <term> Search datasets by name/description\n", .{});
colors.printInfo(" verify <path|id> Verify dataset integrity\n", .{});
colors.printInfo(" hash <path> Compute native SHA256 hash\n", .{});
colors.printInfo(" verify <path|id> Verify dataset integrity (auto-hashes)\n", .{});
colors.printInfo("\nOptions:\n", .{});
colors.printInfo(" --dry-run Show what would be requested\n", .{});
colors.printInfo(" --validate Validate inputs only (no request)\n", .{});
@ -386,9 +382,6 @@ fn searchDatasets(allocator: std.mem.Allocator, term: []const u8, options: *cons
fn verifyDataset(allocator: std.mem.Allocator, target: []const u8, options: *const DatasetOptions) !void {
colors.printInfo("Verifying dataset: {s}\n", .{target});
// For now, use basic stat to check if path exists
// In production, this would compute SHA256 hashes
const path = if (std.fs.path.isAbsolute(target))
target
else
@ -418,11 +411,21 @@ fn verifyDataset(allocator: std.mem.Allocator, target: []const u8, options: *con
total_size += stat.size;
}
// Compute native SHA256 hash
const hash = native_hash.hashDirectory(allocator, path) catch |err| {
colors.printWarning("Hash computation failed: {s}\n", .{@errorName(err)});
// Continue without hash - verification still succeeded
const hash_str: ?[]const u8 = null;
_ = hash_str;
};
defer if (hash) |h| allocator.free(h);
if (options.json) {
const stdout_file = std.fs.File{ .handle = std.posix.STDOUT_FILENO };
var buffer: [4096]u8 = undefined;
const formatted = std.fmt.bufPrint(&buffer, "{{\"path\":\"{s}\",\"files\":{d},\"size\":{d},\"ok\":true}}\n", .{
target, file_count, total_size,
const hash_str = if (hash) |h| h else "null";
const formatted = std.fmt.bufPrint(&buffer, "{{\"path\":\"{s}\",\"files\":{d},\"size\":{d},\"hash\":\"{s}\",\"ok\":true}}\n", .{
target, file_count, total_size, hash_str,
}) catch unreachable;
try stdout_file.writeAll(formatted);
} else if (options.csv) {
@ -437,11 +440,18 @@ fn verifyDataset(allocator: std.mem.Allocator, target: []const u8, options: *con
try stdout_file.writeAll(line3);
const line4 = try std.fmt.bufPrint(&buf, "size_mb,{d:.2}\n", .{@as(f64, @floatFromInt(total_size)) / (1024 * 1024)});
try stdout_file.writeAll(line4);
if (hash) |h| {
const line5 = try std.fmt.bufPrint(&buf, "sha256,{s}\n", .{h});
try stdout_file.writeAll(line5);
}
} else {
colors.printSuccess("✓ Dataset verified\n", .{});
colors.printInfo(" Path: {s}\n", .{target});
colors.printInfo(" Files: {d}\n", .{file_count});
colors.printInfo(" Size: {d:.2} MB\n", .{@as(f64, @floatFromInt(total_size)) / (1024 * 1024)});
if (hash) |h| {
colors.printInfo(" SHA256: {s}\n", .{h});
}
}
}