New commands and modules: - Add workers.zig command for worker management and status - Add scheduler_client.zig for scheduler hub communication - Add pii.zig utility for PII detection and redaction in logs/outputs Improvements to existing commands: - groups.zig: enhanced group management with capability metadata - jupyter/mod.zig: improved Jupyter workspace lifecycle handling - tasks.zig: better task status reporting and cancellation support Networking and sync improvements: - ws/client.zig: WebSocket client enhancements for hub protocol - sync_manager.zig: improved sync with scheduler state and conflict resolution - uuid.zig: optimized UUID generation for macOS and Linux Database utilities: - sqlite_embedded.zig: embedded SQLite for CLI-local state caching
193 lines
6.2 KiB
Zig
193 lines
6.2 KiB
Zig
const std = @import("std");
|
|
const db = @import("db.zig");
|
|
const manifest_lib = @import("manifest.zig");
|
|
const config = @import("config.zig");
|
|
const ws = @import("net/ws/client.zig");
|
|
const crypto = @import("utils/crypto.zig");
|
|
const protocol = @import("net/protocol.zig");
|
|
const mode = @import("mode.zig");
|
|
|
|
/// Auto-sync manager for handling offline runs
|
|
/// Automatically syncs pending runs when connection is restored
|
|
pub const AutoSync = struct {
|
|
allocator: std.mem.Allocator,
|
|
|
|
const Self = @This();
|
|
|
|
pub fn init(allocator: std.mem.Allocator) Self {
|
|
return .{
|
|
.allocator = allocator,
|
|
};
|
|
}
|
|
|
|
/// Check for and sync any pending runs
|
|
/// Should be called periodically or when connection is restored
|
|
pub fn syncPendingRuns(self: Self) !SyncResult {
|
|
const cfg = try config.Config.load(self.allocator);
|
|
defer {
|
|
var mut_cfg = cfg;
|
|
mut_cfg.deinit(self.allocator);
|
|
}
|
|
|
|
// Check if we're online
|
|
const mode_result = try mode.detect(self.allocator, cfg);
|
|
if (mode.isOffline(mode_result.mode)) {
|
|
return .{ .synced = 0, .failed = 0, .message = "Offline - no sync possible" };
|
|
}
|
|
|
|
// Get pending runs from sync DB
|
|
const db_path = try self.getSyncDBPath();
|
|
defer self.allocator.free(db_path);
|
|
|
|
var database = try db.initOrOpenSyncDB(self.allocator, db_path);
|
|
defer database.close();
|
|
|
|
const pending = try database.getPendingRuns(self.allocator);
|
|
defer {
|
|
for (pending) |run_id| {
|
|
self.allocator.free(run_id);
|
|
}
|
|
self.allocator.free(pending);
|
|
}
|
|
|
|
if (pending.len == 0) {
|
|
return .{ .synced = 0, .failed = 0, .message = "No pending runs to sync" };
|
|
}
|
|
|
|
std.log.info("Found {d} pending run(s) to sync", .{pending.len});
|
|
|
|
// Connect to server
|
|
const ws_url = try cfg.getWebSocketUrl(self.allocator);
|
|
defer self.allocator.free(ws_url);
|
|
|
|
var client = try ws.Client.connect(self.allocator, ws_url, cfg.api_key);
|
|
defer client.close();
|
|
|
|
const api_key_hash = try crypto.hashApiKey(self.allocator, cfg.api_key);
|
|
defer self.allocator.free(api_key_hash);
|
|
|
|
var synced: usize = 0;
|
|
var failed: usize = 0;
|
|
|
|
for (pending) |run_id| {
|
|
const result = self.syncSingleRun(run_id, cfg, &client, api_key_hash) catch |err| {
|
|
std.log.err("Failed to sync run {s}: {}", .{ run_id[0..@min(8, run_id.len)], err });
|
|
failed += 1;
|
|
continue;
|
|
};
|
|
|
|
if (result) {
|
|
// Mark as synced in database
|
|
try database.markAsSynced(run_id);
|
|
synced += 1;
|
|
std.log.info("Synced run {s}", .{run_id[0..@min(8, run_id.len)]});
|
|
} else {
|
|
failed += 1;
|
|
}
|
|
}
|
|
|
|
const msg = try std.fmt.allocPrint(self.allocator, "Synced {d}/{d} runs", .{ synced, pending.len });
|
|
return .{
|
|
.synced = synced,
|
|
.failed = failed,
|
|
.message = msg,
|
|
};
|
|
}
|
|
|
|
/// Sync a single run to the server
|
|
fn syncSingleRun(
|
|
self: Self,
|
|
run_id: []const u8,
|
|
cfg: config.Config,
|
|
client: *ws.Client,
|
|
api_key_hash: []const u8,
|
|
) !bool {
|
|
// Find the run manifest
|
|
const manifest_path = try self.findManifestPath(run_id, cfg);
|
|
defer if (manifest_path) |p| self.allocator.free(p);
|
|
|
|
if (manifest_path == null) {
|
|
std.log.warn("Could not find manifest for run {s}", .{run_id[0..@min(8, run_id.len)]});
|
|
return false;
|
|
}
|
|
|
|
// Read manifest
|
|
var manifest = try manifest_lib.readManifest(manifest_path.?, self.allocator);
|
|
defer manifest.deinit(self.allocator);
|
|
|
|
// Send sync request to server
|
|
try client.sendSyncRunRequest(
|
|
run_id,
|
|
manifest.job_name orelse "unnamed",
|
|
manifest.command,
|
|
manifest.status orelse "UNKNOWN",
|
|
manifest.exit_code,
|
|
api_key_hash,
|
|
);
|
|
|
|
// Wait for response
|
|
const response = try client.receiveMessage(self.allocator);
|
|
defer self.allocator.free(response);
|
|
|
|
// Parse response
|
|
if (std.mem.indexOf(u8, response, "success") != null) {
|
|
return true;
|
|
} else {
|
|
std.log.warn("Server rejected sync: {s}", .{response});
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/// Find the manifest path for a run
|
|
fn findManifestPath(self: Self, run_id: []const u8, cfg: config.Config) !?[]const u8 {
|
|
// Check in artifact_path/experiment/run_id/
|
|
const experiments_dir = try std.fs.openDirAbsolute(cfg.artifact_path, .{ .iterate = true });
|
|
defer experiments_dir.close();
|
|
|
|
var iter = experiments_dir.iterate();
|
|
while (try iter.next()) |entry| {
|
|
if (entry.kind != .directory) continue;
|
|
|
|
const run_dir_path = try std.fs.path.join(self.allocator, &[_][]const u8{
|
|
cfg.artifact_path,
|
|
entry.name,
|
|
run_id,
|
|
});
|
|
|
|
const manifest_path = try std.fs.path.join(self.allocator, &[_][]const u8{
|
|
run_dir_path,
|
|
"run_manifest.json",
|
|
});
|
|
defer self.allocator.free(manifest_path);
|
|
|
|
if (std.fs.accessAbsolute(manifest_path, .{})) {
|
|
return run_dir_path; // Return the directory path
|
|
} else |_| {
|
|
self.allocator.free(run_dir_path);
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
fn getSyncDBPath(self: Self) ![]const u8 {
|
|
const home = std.posix.getenv("HOME") orelse ".";
|
|
return std.fs.path.join(self.allocator, &[_][]const u8{ home, ".ml", "sync.db" });
|
|
}
|
|
};
|
|
|
|
pub const SyncResult = struct {
|
|
synced: usize,
|
|
failed: usize,
|
|
message: []const u8,
|
|
|
|
pub fn deinit(self: SyncResult, allocator: std.mem.Allocator) void {
|
|
allocator.free(self.message);
|
|
}
|
|
};
|
|
|
|
/// Convenience function to sync pending runs
|
|
pub fn syncPendingRuns(allocator: std.mem.Allocator) !SyncResult {
|
|
const auto_sync = AutoSync.init(allocator);
|
|
return auto_sync.syncPendingRuns();
|
|
}
|