fetch_ml/cli/src/mode.zig
Jeremie Fraeys 7efe8bbfbf
native: security hardening, research trustworthiness, and CVE mitigations
Security Fixes:
- CVE-2024-45339: Add O_EXCL flag to temp file creation in storage_write_entries()
  Prevents symlink attacks on predictable .tmp file paths
- CVE-2025-47290: Use openat_nofollow() in storage_open()
  Closes TOCTOU race condition via path_sanitizer infrastructure
- CVE-2025-0838: Add MAX_BATCH_SIZE=10000 to add_tasks()
  Prevents integer overflow in batch operations

Research Trustworthiness (dataset_hash):
- Deterministic file ordering: std::sort after collect_files()
- Recursive directory traversal: depth-limited with cycle detection
- Documented exclusions: hidden files and special files noted in API

Bug Fixes:
- R1: storage_init path validation for non-existent directories
- R2: safe_strncpy return value check before strcat
- R3: parallel_hash 256-file cap replaced with std::vector
- R4: wire qi_compact_index/qi_rebuild_index stubs
- R5: CompletionLatch race condition fix (hold mutex during decrement)
- R6: ARMv8 SHA256 transform fix (save abcd_pre before vsha256hq_u32)
- R7: fuzz_index_storage header format fix
- R8: enforce null termination in add_tasks/update_tasks
- R9: use 64 bytes (not 65) in combined hash to exclude null terminator
- R10: status field persistence in save()

New Tests:
- test_recursive_dataset.cpp: Verify deterministic recursive hashing
- test_storage_symlink_resistance.cpp: Verify CVE-2024-45339 fix
- test_queue_index_batch_limit.cpp: Verify CVE-2025-0838 fix
- test_sha256_arm_kat.cpp: ARMv8 known-answer tests
- test_storage_init_new_dir.cpp: F1 verification
- test_parallel_hash_large_dir.cpp: F3 verification
- test_queue_index_compact.cpp: F4 verification

All 8 native tests passing. Library ready for research lab deployment.
2026-02-21 13:33:45 -05:00

108 lines
3.6 KiB
Zig

const std = @import("std");
const Config = @import("config.zig").Config;
const ws = @import("net/ws/client.zig");
/// Mode represents the operating mode of the CLI
pub const Mode = enum {
/// Local/offline mode - runs execute locally, tracking to SQLite
offline,
/// Online/runner mode - jobs queue to remote server
online,
};
/// DetectionResult includes the mode and any warning messages
pub const DetectionResult = struct {
mode: Mode,
warning: ?[]const u8,
};
/// Detect mode based on configuration and environment
/// Priority order (CLI — checked on every command):
/// 1. FETCHML_LOCAL=1 env var → local (forced, skip ping)
/// 2. force_local=true in config → local (forced, skip ping)
/// 3. cfg.Host == "" → local (not configured)
/// 4. API ping within 2s timeout → runner mode
/// - timeout / refused → local (fallback, log once per session)
/// - 401/403 → local (fallback, warn once about auth)
pub fn detect(allocator: std.mem.Allocator, cfg: Config) !DetectionResult {
// Priority 1: FETCHML_LOCAL env var
if (std.posix.getenv("FETCHML_LOCAL")) |val| {
if (std.mem.eql(u8, val, "1")) {
return .{ .mode = .offline, .warning = null };
}
}
// Priority 2: force_local in config
if (cfg.force_local) {
return .{ .mode = .offline, .warning = null };
}
// Priority 3: No host configured
if (cfg.worker_host.len == 0) {
return .{ .mode = .offline, .warning = null };
}
// Priority 4: API ping with 2s timeout
const ping_result = try pingServer(allocator, cfg, 2000);
return switch (ping_result) {
.success => .{ .mode = .online, .warning = null },
.timeout => .{ .mode = .offline, .warning = "Server unreachable, falling back to local mode" },
.refused => .{ .mode = .offline, .warning = "Server connection refused, falling back to local mode" },
.auth_error => .{ .mode = .offline, .warning = "Authentication failed, falling back to local mode" },
};
}
/// PingResult represents the outcome of a server ping
const PingResult = enum {
success,
timeout,
refused,
auth_error,
};
/// Ping the server with a timeout - simplified version that just tries to connect
fn pingServer(allocator: std.mem.Allocator, cfg: Config, timeout_ms: u64) !PingResult {
_ = timeout_ms; // Timeout not implemented for this simplified version
const ws_url = try cfg.getWebSocketUrl(allocator);
defer allocator.free(ws_url);
var connection = ws.Client.connect(allocator, ws_url, cfg.api_key) catch |err| {
switch (err) {
error.ConnectionTimedOut => return .timeout,
error.ConnectionRefused => return .refused,
error.AuthenticationFailed => return .auth_error,
else => return .refused,
}
};
defer connection.close();
// Try to receive any message to confirm server is responding
const response = connection.receiveMessage(allocator) catch |err| {
switch (err) {
error.ConnectionTimedOut => return .timeout,
else => return .refused,
}
};
defer allocator.free(response);
return .success;
}
/// Check if mode is online
pub fn isOnline(mode: Mode) bool {
return mode == .online;
}
/// Check if mode is offline
pub fn isOffline(mode: Mode) bool {
return mode == .offline;
}
/// Require online mode, returning error if offline
pub fn requireOnline(mode: Mode, command_name: []const u8) !void {
if (mode == .offline) {
std.log.err("{s} requires server connection", .{command_name});
return error.RequiresServer;
}
}