fetch_ml/cli/src/sync_manager.zig
Jeremie Fraeys ad3be36a6d
feat(cli): add workers command, scheduler client, and PII utilities
New commands and modules:
- Add workers.zig command for worker management and status
- Add scheduler_client.zig for scheduler hub communication
- Add pii.zig utility for PII detection and redaction in logs/outputs

Improvements to existing commands:
- groups.zig: enhanced group management with capability metadata
- jupyter/mod.zig: improved Jupyter workspace lifecycle handling
- tasks.zig: better task status reporting and cancellation support

Networking and sync improvements:
- ws/client.zig: WebSocket client enhancements for hub protocol
- sync_manager.zig: improved sync with scheduler state and conflict resolution
- uuid.zig: optimized UUID generation for macOS and Linux

Database utilities:
- sqlite_embedded.zig: embedded SQLite for CLI-local state caching
2026-03-12 12:00:49 -04:00

193 lines
6.2 KiB
Zig

const std = @import("std");
const db = @import("db.zig");
const manifest_lib = @import("manifest.zig");
const config = @import("config.zig");
const ws = @import("net/ws/client.zig");
const crypto = @import("utils/crypto.zig");
const protocol = @import("net/protocol.zig");
const mode = @import("mode.zig");
/// Auto-sync manager for handling offline runs
/// Automatically syncs pending runs when connection is restored
pub const AutoSync = struct {
allocator: std.mem.Allocator,
const Self = @This();
pub fn init(allocator: std.mem.Allocator) Self {
return .{
.allocator = allocator,
};
}
/// Check for and sync any pending runs
/// Should be called periodically or when connection is restored
pub fn syncPendingRuns(self: Self) !SyncResult {
const cfg = try config.Config.load(self.allocator);
defer {
var mut_cfg = cfg;
mut_cfg.deinit(self.allocator);
}
// Check if we're online
const mode_result = try mode.detect(self.allocator, cfg);
if (mode.isOffline(mode_result.mode)) {
return .{ .synced = 0, .failed = 0, .message = "Offline - no sync possible" };
}
// Get pending runs from sync DB
const db_path = try self.getSyncDBPath();
defer self.allocator.free(db_path);
var database = try db.initOrOpenSyncDB(self.allocator, db_path);
defer database.close();
const pending = try database.getPendingRuns(self.allocator);
defer {
for (pending) |run_id| {
self.allocator.free(run_id);
}
self.allocator.free(pending);
}
if (pending.len == 0) {
return .{ .synced = 0, .failed = 0, .message = "No pending runs to sync" };
}
std.log.info("Found {d} pending run(s) to sync", .{pending.len});
// Connect to server
const ws_url = try cfg.getWebSocketUrl(self.allocator);
defer self.allocator.free(ws_url);
var client = try ws.Client.connect(self.allocator, ws_url, cfg.api_key);
defer client.close();
const api_key_hash = try crypto.hashApiKey(self.allocator, cfg.api_key);
defer self.allocator.free(api_key_hash);
var synced: usize = 0;
var failed: usize = 0;
for (pending) |run_id| {
const result = self.syncSingleRun(run_id, cfg, &client, api_key_hash) catch |err| {
std.log.err("Failed to sync run {s}: {}", .{ run_id[0..@min(8, run_id.len)], err });
failed += 1;
continue;
};
if (result) {
// Mark as synced in database
try database.markAsSynced(run_id);
synced += 1;
std.log.info("Synced run {s}", .{run_id[0..@min(8, run_id.len)]});
} else {
failed += 1;
}
}
const msg = try std.fmt.allocPrint(self.allocator, "Synced {d}/{d} runs", .{ synced, pending.len });
return .{
.synced = synced,
.failed = failed,
.message = msg,
};
}
/// Sync a single run to the server
fn syncSingleRun(
self: Self,
run_id: []const u8,
cfg: config.Config,
client: *ws.Client,
api_key_hash: []const u8,
) !bool {
// Find the run manifest
const manifest_path = try self.findManifestPath(run_id, cfg);
defer if (manifest_path) |p| self.allocator.free(p);
if (manifest_path == null) {
std.log.warn("Could not find manifest for run {s}", .{run_id[0..@min(8, run_id.len)]});
return false;
}
// Read manifest
var manifest = try manifest_lib.readManifest(manifest_path.?, self.allocator);
defer manifest.deinit(self.allocator);
// Send sync request to server
try client.sendSyncRunRequest(
run_id,
manifest.job_name orelse "unnamed",
manifest.command,
manifest.status orelse "UNKNOWN",
manifest.exit_code,
api_key_hash,
);
// Wait for response
const response = try client.receiveMessage(self.allocator);
defer self.allocator.free(response);
// Parse response
if (std.mem.indexOf(u8, response, "success") != null) {
return true;
} else {
std.log.warn("Server rejected sync: {s}", .{response});
return false;
}
}
/// Find the manifest path for a run
fn findManifestPath(self: Self, run_id: []const u8, cfg: config.Config) !?[]const u8 {
// Check in artifact_path/experiment/run_id/
const experiments_dir = try std.fs.openDirAbsolute(cfg.artifact_path, .{ .iterate = true });
defer experiments_dir.close();
var iter = experiments_dir.iterate();
while (try iter.next()) |entry| {
if (entry.kind != .directory) continue;
const run_dir_path = try std.fs.path.join(self.allocator, &[_][]const u8{
cfg.artifact_path,
entry.name,
run_id,
});
const manifest_path = try std.fs.path.join(self.allocator, &[_][]const u8{
run_dir_path,
"run_manifest.json",
});
defer self.allocator.free(manifest_path);
if (std.fs.accessAbsolute(manifest_path, .{})) {
return run_dir_path; // Return the directory path
} else |_| {
self.allocator.free(run_dir_path);
}
}
return null;
}
fn getSyncDBPath(self: Self) ![]const u8 {
const home = std.posix.getenv("HOME") orelse ".";
return std.fs.path.join(self.allocator, &[_][]const u8{ home, ".ml", "sync.db" });
}
};
pub const SyncResult = struct {
synced: usize,
failed: usize,
message: []const u8,
pub fn deinit(self: SyncResult, allocator: std.mem.Allocator) void {
allocator.free(self.message);
}
};
/// Convenience function to sync pending runs
pub fn syncPendingRuns(allocator: std.mem.Allocator) !SyncResult {
const auto_sync = AutoSync.init(allocator);
return auto_sync.syncPendingRuns();
}