From 8b83d604520a184c787141f57f9df3a870a418fd Mon Sep 17 00:00:00 2001
From: Jeremie Fraeys <jfaeys@gmail.com>
Date: Thu, 5 Mar 2026 13:12:52 -0500
Subject: [PATCH] docs: update changelog, readme and CLI reference for 0.1.0

- Add 0.1.0 release entry to CHANGELOG.md with CLI and C++ native libs highlights
- Update README.md with current project status
- Sync CLI reference documentation with recent command changes
---
 CHANGELOG.md              |  18 ++++
 README.md                 |   4 +-
 docs/src/cli-reference.md | 185 +++++++++++++++++++-------------------
 3 files changed, 114 insertions(+), 93 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3e84c36..1761991 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,23 @@
 ## [Unreleased]
 
+### Added - CLI Connection Handling & Execution Mode (2026-03-05)
+- CLI: Standardized `ConnectionContext` with `isConnected()` method across all commands
+- CLI: Unified `ml info` command - queries remote server when connected, falls back to local manifests
+- CLI: Added `execution_mode` config setting (`local`/`remote`/`auto`) for persistent execution preference
+- CLI: Added `--mode` flag to `ml init` for setting execution mode during initialization
+- CLI: Added `--rerun` flag to `run` command for re-running previous jobs with full config inheritance (dataset, snapshot, resources, narrative)
+- CLI: Added `--inherit-narrative` and `--inherit-config` flags for provenance tracking
+- WebSocket: Added `query_run_info` opcode (0x28) for remote run info queries
+- Server: Added `handleQueryRunInfo` handler to serve run details via WebSocket
+
+### Changed - CLI Command Behavior (2026-03-05)
+- CLI: Removed `--local` and `--remote` flags from `ml run`, `ml exec`, and `ml info` commands
+- CLI: Execution mode now determined by config setting only (no per-command flags needed)
+- CLI: Removed obsolete `printUsage` from `exec/mod.zig` (now internal module used by `ml run`)
+- CLI: Removed `queue` and `requeue` commands - functionality merged into unified `run` command
+- CLI: Renamed `exec/` directory to `executor/` (execution logic for local/remote)
+- CLI: Renamed `queue/` directory to `submission/` (job submission logic for `run` command)
+
 ### Security - Comprehensive Hardening (2026-02-23)
 
 **Test Coverage Implementation (Phase 8):**
diff --git a/README.md b/README.md
index f3cfa56..0ef9c2f 100644
--- a/README.md
+++ b/README.md
@@ -104,6 +104,7 @@ worker_user = "dev_user"
 worker_base = "/tmp/ml-experiments"
 worker_port = 22
 api_key = "your-api-key"
+execution_mode = "auto"  # local | remote | auto
 EOF
 
 # Core commands
@@ -167,8 +168,9 @@ make cross-platform    # builds for Linux/macOS/Windows
 ## Docs
 
 See `docs/` for detailed guides:
+- `docs/src/cli-reference.md` – CLI reference (execution mode, commands, options)
 - `docs/src/native-libraries.md` – Native C++ libraries (build, test, deploy)
-- `docs/src/zig-cli.md` – CLI reference
+- `docs/src/zig-cli.md` – CLI architecture
 - `docs/src/quick-start.md` – Full setup guide
 - `docs/src/deployment.md` – Production deployment
 - `docs/src/research-features.md` – Research workflow features (narrative capture, outcomes, search)
diff --git a/docs/src/cli-reference.md b/docs/src/cli-reference.md
index f56a066..316ebda 100644
--- a/docs/src/cli-reference.md
+++ b/docs/src/cli-reference.md
@@ -26,23 +26,16 @@ High-performance command-line interface for experiment management, written in Zi
 | Command | Description | Example |
 |---------|-------------|----------|
 | `init` | Interactive configuration setup | `ml init` |
-| `sync` | Sync project to worker with deduplication | `ml sync ./project --name myjob --queue` |
-| `queue` | Queue job for execution | `ml queue myjob --commit abc123 --priority 8` |
+| `sync` | Sync project to worker with deduplication | `ml sync ./project --name myjob` |
+| `run` | Execute job locally or remotely (respects execution_mode) | `ml run myjob --commit abc123` |
 | `status` | Get system and worker status | `ml status` |
 | `monitor` | Launch TUI monitoring via SSH | `ml monitor` |
 | `cancel` | Cancel running job | `ml cancel job123` |
 | `prune` | Clean up old experiments | `ml prune --keep 10` |
-| `watch` | Auto-sync directory on changes | `ml watch ./project --queue` |
+| `watch` | Auto-sync directory on changes | `ml watch ./project` |
 | `jupyter` | Manage Jupyter notebook services | `ml jupyter start --name my-nb` |
 | `validate` | Validate provenance/integrity for a commit or task | `ml validate <commit_id> --verbose` |
 | `info` | Show run info from `run_manifest.json` | `ml info <run_dir>` |
-| `requeue` | Re-submit an existing run/commit with new args/resources | `ml requeue <commit_id|run_id|task_id|path> -- --epochs 20` |
-| `logs` | Fetch and follow job logs | `ml logs job123 -n 100` |
-| `compare` | Diff two runs showing narrative/metadata/metrics | `ml compare run1 run2` |
-| `find` | Search experiments by tags/outcome/dataset | `ml find --outcome validated --tag bert` |
-| `export` | Export run bundles with optional anonymization | `ml export run123 --anonymize` |
-| `outcome` | Set post-run outcome tracking | `ml outcome set run123 --outcome validated` |
-| `narrative` | Set run narrative fields | `ml narrative set run123 --hypothesis "LR matters"` |
 | `dataset` | Dataset operations (verify, list, etc.) | `ml dataset verify ./data` |
 
 ### Command Details
@@ -55,14 +48,93 @@ Creates a configuration template at `~/.ml/config.toml` with:
 - Worker connection details
 - API authentication
 - Base paths and ports
+- Execution mode preference
+
+**Execution Mode Options:**
+```bash
+# Set default execution mode during init
+ml init --mode=local    # Always execute locally
+ml init --mode=remote   # Always execute on remote server
+ml init --mode=auto     # Auto-detect based on server connectivity (default)
+```
+
+The `execution_mode` setting controls whether commands like `ml run`, `ml exec`, and `ml info` operate locally or remotely. When set to:
+- `local`: Commands always execute locally using SQLite storage
+- `remote`: Commands always attempt remote server execution
+- `auto`: Commands check server connectivity and fallback to local if unavailable
+
+**Modifying Execution Mode:**
+Edit `~/.ml/config.toml` directly:
+```toml
+execution_mode = "local"  # Options: local, remote, auto
+```
+
+---
+
+#### `run` - Execute Job (Local or Remote)
+
+Execute a job either locally or on the remote server based on `execution_mode` configuration.
+
+```bash
+# Run with commit ID (uses execution_mode from config)
+ml run my-job --commit abc123def456
+
+# Run with priority
+ml run my-job --commit abc123 --priority 8
+
+# Run with resource requirements
+ml run my-job --commit abc123 --cpu 4 --memory 16 --gpu 1
+
+# Run with runner args
+ml run my-job --commit abc123 -- --epochs 5 --lr 1e-3
+```
+
+**Execution Behavior:**
+- `execution_mode = "local"`: Runs job locally using SQLite storage
+- `execution_mode = "remote"`: Submits job to remote server via WebSocket
+- `execution_mode = "auto"`: Connects to server if available, otherwise runs locally
+
+**Features:**
+- Respects `execution_mode` from `~/.ml/config.toml`
+- Same interface regardless of local or remote execution
+- WebSocket-based communication for remote execution
+- SQLite-based tracking for local execution
+
+**Re-running Previous Runs:**
+```bash
+# Re-run with ABSOLUTE REPRODUCIBILITY (inherits everything by default)
+ml run my-job --rerun run-id-abc123
+
+# Re-run with custom overrides (inherit all, then modify)
+ml run my-job --rerun run-id-abc123 --cpu 8 --memory 32
+
+# Selective inheritance (only inherit specific aspects)
+ml run my-job --rerun run-id-abc123 --inherit-config        # Only resources/dataset
+ml run my-job --rerun run-id-abc123 --inherit-narrative     # Only hypothesis/context/tags
+
+# Full provenance chain - inherit everything and link as child run
+ml run my-job --rerun run-id-abc123 --parent
+```
+
+**Default Behavior (Research-First Reproducibility):**
+- `--rerun` alone inherits **everything** for absolute reproducibility
+- Explicit `--inherit-*` flags opt-in to partial inheritance
+- User-specified flags always override inherited values
+
+**Inherited Fields:**
+- Narrative: hypothesis, context, intent, expected_outcome, tags
+- Config: cpu, memory, gpu, gpu_memory, priority, commit_id, snapshot_id, snapshot_sha256
+- Provenance: parent_run_id links runs for full lineage tracking
+
+---
 
 #### `sync` - Project Synchronization
 ```bash
 # Basic sync
 ml sync ./my-project
 
-# Sync with custom name and queue
-ml sync ./my-project --name "experiment-1" --queue
+# Sync with custom name
+ml sync ./my-project --name "experiment-1"
 
 # Sync with priority
 ml sync ./my-project --priority 9
@@ -72,91 +144,14 @@ ml sync ./my-project --priority 9
 - Content-addressed storage for deduplication
 - SHA256 commit ID generation
 - Rsync-based file transfer
-- Automatic queuing (with `--queue` flag)
-
-#### `queue` - Job Management
-```bash
-# Queue with commit ID
-ml queue my-job --commit abc123def456
-
-# Queue with commit ID prefix (>=7 hex chars; must be unique)
-ml queue my-job --commit abc123 --priority 8
-
-# Queue with extra runner args (stored as task.Args)
-ml queue my-job --commit abc123 -- --epochs 5 --lr 1e-3
-```
-
-**Features:**
-- WebSocket-based communication
-- Priority queuing system
-- API key authentication
-
-**Notes:**
-- `--priority` is passed to the server as a single byte (0-255).
-- Args are sent via a dedicated queue opcode and become `task.Args` on the worker.
-- `--commit` may be a full 40-hex commit id or a unique prefix (>=7 hex chars) resolvable under `worker_base`.
-
-#### `requeue` - Re-submit a Previous Run
-
-Re-submit an existing run with optional config changes, resource overrides, and provenance tracking.
-
-```bash
-# Basic requeue
-ml requeue <commit_id> -- --epochs 20
-
-# Requeue by commit_id prefix (>=7 hex chars; must be unique)
-ml requeue <commit_prefix> -- --epochs 20
-
-# Requeue by run_id/task_id (CLI scans run_manifest.json under worker_base)
-ml requeue <run_id> -- --epochs 20
-
-# Requeue by a run directory or run_manifest.json path
-ml requeue /data/ml-experiments/finished/<run_id> -- --epochs 20
-
-# Override priority/resources on requeue
-ml requeue <task_id> --priority 10 --gpu 1 -- --epochs 20
-
-# Iterative experimentation with config changes
-ml requeue run-id-123 --with-changes --lr=0.002 --batch-size=64
-
-# Inherit narrative from parent run
-ml requeue run-id-123 --inherit-narrative -- --epochs 50
-
-# Inherit config/metadata from parent
-ml requeue run-id-123 --inherit-config --with-changes --lr=0.001
-
-# Link as child run for provenance tracking
-ml requeue run-id-123 --parent -- --epochs 100
-
-# Full provenance chain example
-ml requeue run-id-123 --parent --inherit-narrative --inherit-config \
-  --with-changes --lr=0.0001 -- --epochs 200
-```
-
-**What it does:**
-- Locates `run_manifest.json`
-- Extracts `commit_id`
-- Submits a new queue request using that `commit_id` with optional overridden args/resources
-
-**Flags:**
-- `--with-changes`: Override specific config values (key=value pairs)
-- `--inherit-narrative`: Copy hypothesis/context/intent from parent run
-- `--inherit-config`: Copy metadata/tags from parent run
-- `--parent`: Link as child run for provenance tracking
-
-**Notes:**
-- `--priority` is passed to the server as a single byte (0-255).
-- Args are sent via a dedicated queue opcode and become `task.Args` on the worker.
-- `--commit` may be a full 40-hex commit id or a unique prefix (>=7 hex chars) resolvable under `worker_base`.
-- Tasks support optional `snapshot_id` and `dataset_specs` fields server-side (for provenance and dataset resolution).
 
 #### `watch` - Auto-Sync Monitoring
 ```bash
 # Watch directory for changes
 ml watch ./project
 
-# Watch and auto-queue on changes
-ml watch ./project --name "dev-exp" --queue
+# Watch with custom name
+ml watch ./project --name "dev-exp"
 ```
 
 **Features:**
@@ -379,7 +374,7 @@ ml narrative set run-id-123 --experiment-group lr-sweep-2024
 ml narrative set run-id-123 --tags "bert,training,optimizer"
 ```
 
-**Note:** These fields can also be set at queue time using flags like `--hypothesis`, `--context`, etc.
+**Note:** These fields can also be set at run time using flags like `--hypothesis`, `--context`, etc.
 
 ---
 
@@ -463,8 +458,14 @@ worker_user = "mluser"
 worker_base = "/data/ml-experiments"
 worker_port = 22
 api_key = "your-api-key"
+execution_mode = "auto"  # local | remote | auto
 ```
 
+**Execution Mode:**
+- `local`: Always execute locally using SQLite storage
+- `remote`: Always attempt remote server execution
+- `auto`: Check server connectivity and fallback to local if unavailable (default)
+
 ### Performance Features
 
 - **Content-Addressed Storage**: Automatic deduplication of identical files