fetch_ml/cli/src/commands/run.zig
Jeremie Fraeys e2673be8b5
feat(cli): unify exec, queue, run into single 'run' command
Since app is not released, removed old commands entirely:
- Deleted exec.zig (533 lines) - modularized version
- Deleted queue.zig (1248 lines) - complete removal
- Unified all functionality into run.zig

New unified 'ml run' command features:
- Auto-detects local vs remote execution via mode.detect()
- Supports --local and --remote flags to force execution mode
- Includes all resource options: --cpu, --memory, --gpu
- Research context: --hypothesis, --context, --intent, --tags
- Validation modes: --dry-run, --validate, --explain
- Uses modular exec/remote.zig and exec/local.zig for execution

Dispatcher updates (main.zig):
- Removed 'e' (exec) handler
- Removed 'q' (queue) handler
- Updated help text to show unified command

Import cleanup (commands.zig):
- Removed queue.zig import

Total code reduction: ~1,700 lines
All tests pass.
2026-03-05 10:57:00 -05:00

219 lines
8.3 KiB
Zig

const std = @import("std");
const core = @import("../core.zig");
const config = @import("../config.zig");
const mode = @import("../mode.zig");
const common = @import("common.zig");
const remote = @import("exec/remote.zig");
const local = @import("exec/local.zig");
pub const RunMode = enum {
local,
remote,
};
pub const RunOptions = struct {
cpu: u8 = 1,
memory: u8 = 4,
gpu: u8 = 0,
gpu_memory: ?[]const u8 = null,
priority: u8 = 5,
dry_run: bool = false,
validate: bool = false,
explain: bool = false,
force: bool = false,
hypothesis: ?[]const u8 = null,
context: ?[]const u8 = null,
intent: ?[]const u8 = null,
expected_outcome: ?[]const u8 = null,
tags: ?[]const u8 = null,
};
/// Unified run command - transparently handles local and remote execution
pub fn execute(allocator: std.mem.Allocator, args: []const []const u8) !void {
var flags = core.flags.CommonFlags{};
var force_local = false;
var force_remote = false;
// Find "--" separator
var sep_index: ?usize = null;
for (args, 0..) |a, idx| {
if (std.mem.eql(u8, a, "--")) {
sep_index = idx;
break;
}
}
const pre = args[0..(sep_index orelse args.len)];
// Parse options
var job_name: ?[]const u8 = null;
var options = RunOptions{};
var i: usize = 0;
while (i < pre.len) : (i += 1) {
const arg = pre[i];
if (std.mem.eql(u8, arg, "--help") or std.mem.eql(u8, arg, "-h")) {
try printUsage();
return;
} else if (std.mem.eql(u8, arg, "--json")) {
flags.json = true;
} else if (std.mem.eql(u8, arg, "--priority") and i + 1 < pre.len) {
options.priority = try std.fmt.parseInt(u8, pre[i + 1], 10);
i += 1;
} else if (std.mem.eql(u8, arg, "--cpu") and i + 1 < pre.len) {
options.cpu = try std.fmt.parseInt(u8, pre[i + 1], 10);
i += 1;
} else if (std.mem.eql(u8, arg, "--memory") and i + 1 < pre.len) {
options.memory = try std.fmt.parseInt(u8, pre[i + 1], 10);
i += 1;
} else if (std.mem.eql(u8, arg, "--gpu") and i + 1 < pre.len) {
options.gpu = try std.fmt.parseInt(u8, pre[i + 1], 10);
i += 1;
} else if (std.mem.eql(u8, arg, "--gpu-memory") and i + 1 < pre.len) {
options.gpu_memory = pre[i + 1];
i += 1;
} else if (std.mem.eql(u8, arg, "--dry-run")) {
options.dry_run = true;
} else if (std.mem.eql(u8, arg, "--validate")) {
options.validate = true;
} else if (std.mem.eql(u8, arg, "--explain")) {
options.explain = true;
} else if (std.mem.eql(u8, arg, "--local")) {
force_local = true;
} else if (std.mem.eql(u8, arg, "--remote")) {
force_remote = true;
} else if (std.mem.eql(u8, arg, "--force")) {
options.force = true;
} else if (std.mem.eql(u8, arg, "--hypothesis") and i + 1 < pre.len) {
options.hypothesis = pre[i + 1];
i += 1;
} else if (std.mem.eql(u8, arg, "--context") and i + 1 < pre.len) {
options.context = pre[i + 1];
i += 1;
} else if (std.mem.eql(u8, arg, "--intent") and i + 1 < pre.len) {
options.intent = pre[i + 1];
i += 1;
} else if (std.mem.eql(u8, arg, "--expected-outcome") and i + 1 < pre.len) {
options.expected_outcome = pre[i + 1];
i += 1;
} else if (std.mem.eql(u8, arg, "--tags") and i + 1 < pre.len) {
options.tags = pre[i + 1];
i += 1;
} else if (!std.mem.startsWith(u8, arg, "-")) {
if (job_name == null) {
job_name = arg;
}
}
}
if (job_name == null) {
try printUsage();
return error.InvalidArgs;
}
// Build args string
var args_str: []const u8 = "";
if (sep_index) |si| {
const post = args[(si + 1)..];
if (post.len > 0) {
var buf = try std.ArrayList(u8).initCapacity(allocator, 256);
defer buf.deinit(allocator);
for (post, 0..) |a, j| {
if (j > 0) try buf.append(allocator, ' ');
try buf.appendSlice(allocator, a);
}
args_str = try buf.toOwnedSlice(allocator);
}
}
defer if (sep_index != null and args_str.len > 0) allocator.free(args_str);
const cfg = try config.Config.load(allocator);
defer {
var mut = cfg;
mut.deinit(allocator);
}
// Determine execution mode
var run_mode: RunMode = undefined;
if (force_local) {
run_mode = .local;
} else if (force_remote) {
run_mode = .remote;
} else {
const mode_result = try mode.detect(allocator, cfg);
run_mode = if (mode.isOnline(mode_result.mode)) .remote else .local;
if (mode_result.warning) |warn| {
std.log.info("{s}", .{warn});
}
}
// Handle special modes
if (options.dry_run) {
return try common.dryRun(allocator, job_name.?, run_mode, &options, args_str);
}
if (options.validate) {
return try validateJob(allocator, job_name.?, &options);
}
if (options.explain) {
return try explainJob(allocator, job_name.?, &options);
}
// Execute
switch (run_mode) {
.remote => {
try remote.execute(allocator, job_name.?, options.priority, &options, args_str, cfg);
},
.local => {
const run_id = try local.execute(allocator, job_name.?, &options, args_str, cfg);
try local.markForSync(allocator, run_id);
if (!flags.json) {
std.debug.print("\nRun completed locally (run_id: {s})\n", .{run_id[0..@min(8, run_id.len)]});
std.debug.print("Will sync to server when connection is available\n", .{});
}
},
}
}
fn validateJob(allocator: std.mem.Allocator, job_name: []const u8, options: *const RunOptions) !void {
_ = options;
const train_script_exists = if (std.fs.cwd().access("train.py", .{})) true else |_| false;
const requirements_exists = if (std.fs.cwd().access("requirements.txt", .{})) true else |_| false;
const overall_valid = train_script_exists and requirements_exists;
std.debug.print("Validation Results for '{s}':\n", .{job_name});
std.debug.print(" train.py: {s}\n", .{if (train_script_exists) "yes" else "no"});
std.debug.print(" requirements.txt: {s}\n", .{if (requirements_exists) "yes" else "no"});
std.debug.print("\n{s}\n", .{if (overall_valid) "✓ Validation passed" else "✗ Validation failed"});
_ = allocator;
}
fn explainJob(allocator: std.mem.Allocator, job_name: []const u8, options: *const RunOptions) !void {
std.debug.print("Job Explanation for '{s}':\n", .{job_name});
std.debug.print(" CPU: {d}, Memory: {d}GB, GPU: {d}\n", .{ options.cpu, options.memory, options.gpu });
if (options.hypothesis) |h| std.debug.print(" Hypothesis: {s}\n", .{h});
std.debug.print("\n Action: Would execute\n", .{});
_ = allocator;
}
fn printUsage() !void {
std.debug.print("Usage: ml run <job_name> [options] [-- <args>]\n", .{});
std.debug.print("\nUnified run command - handles both local and remote execution.\n", .{});
std.debug.print("\nOptions:\n", .{});
std.debug.print(" --priority <1-10> Job priority (default: 5)\n", .{});
std.debug.print(" --cpu <n> CPU cores requested (default: 1)\n", .{});
std.debug.print(" --memory <n> Memory GB requested (default: 4)\n", .{});
std.debug.print(" --gpu <n> GPU devices requested (default: 0)\n", .{});
std.debug.print(" --local Force local execution\n", .{});
std.debug.print(" --remote Force remote execution\n", .{});
std.debug.print(" --dry-run Show what would happen\n", .{});
std.debug.print(" --validate Validate job without running\n", .{});
std.debug.print(" --explain Explain what will happen\n", .{});
std.debug.print(" --hypothesis <text> Research hypothesis\n", .{});
std.debug.print(" --context <text> Background information\n", .{});
std.debug.print(" --tags <csv> Comma-separated tags\n", .{});
}