Fix multi-user authentication and clean up debug code

- Fix YAML tags in auth config struct (json -> yaml) - Update CLI configs to use pre-hashed API keys - Remove double hashing in WebSocket client - Fix port mapping (9102 -> 9103) in CLI commands - Update permission keys to use jobs:read, jobs:create, etc. - Clean up all debug logging from CLI and server - All user roles now authenticate correctly: * Admin: Can queue jobs and see all jobs * Researcher: Can queue jobs and see own jobs * Analyst: Can see status (read-only access) Multi-user authentication is now fully functional.
2025-12-06 12:35:32 -05:00 · 2025-12-06 12:35:32 -05:00 · ea15af1833
commit ea15af1833
parent 10a3afaafb
219 changed files with 28033 additions and 3781 deletions
--- a/.github/workflows/benchmark-metrics.yml
+++ b/.github/workflows/benchmark-metrics.yml
@ -0,0 +1,91 @@
 name: Benchmark Metrics
 on:
  push:
    branches: [ main, develop ]
  pull_request:
    branches: [ main ]
  schedule:
    - cron: '0 6 * * *'  # Daily at 6 AM UTC
  workflow_dispatch:
 jobs:
  benchmark:
    runs-on: ubuntu-latest
    steps:
    - name: Checkout code
      uses: actions/checkout@v4
    - name: Setup Go
      uses: actions/setup-go@v4
      with:
        go-version: '1.21'
    - name: Cache Go modules
      uses: actions/cache@v3
      with:
        path: ~/go/pkg/mod
        key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
    - name: Run benchmarks
      run: |
        echo "Running performance benchmarks..."
        go test -bench=. -benchmem ./tests/benchmarks/... > benchmark_results.txt 2>&1
        # Extract benchmark results
        grep "Benchmark.*-[0-9].*" benchmark_results.txt > clean_benchmarks.txt || true
    - name: Convert to Prometheus metrics
      run: |
        # Create Prometheus metrics file
        echo "# HELP benchmark_time_per_op Time per operation in nanoseconds" > prometheus_metrics.txt
        echo "# TYPE benchmark_time_per_op gauge" >> prometheus_metrics.txt
        echo "# HELP benchmark_memory_per_op Memory per operation in bytes" >> prometheus_metrics.txt
        echo "# TYPE benchmark_memory_per_op gauge" >> prometheus_metrics.txt
        echo "# HELP benchmark_allocs_per_op Allocations per operation" >> prometheus_metrics.txt
        echo "# TYPE benchmark_allocs_per_op gauge" >> prometheus_metrics.txt
        # Parse benchmark results and convert to Prometheus format
        while IFS= read -r line; do
          if [[ -n "$line" ]]; then
            BENCHMARK_NAME=$(echo "$line" | awk '{print $1}' | sed 's/-[0-9]*$//')
            ITERATIONS=$(echo "$line" | awk '{print $2}')
            TIME_PER_OP=$(echo "$line" | awk '{print $3}')
            MEMORY_PER_OP=$(echo "$line" | awk '{print $4}')
            ALLOCS_PER_OP=$(echo "$line" | awk '{print $5}')
            # Clean benchmark name for Prometheus
            CLEAN_NAME=$(echo "$BENCHMARK_NAME" | sed 's/[^a-zA-Z0-9_]/_/g')
            echo "benchmark_time_per_op{benchmark=\"$CLEAN_NAME\"} ${TIME_PER_OP/ns/}" >> prometheus_metrics.txt
            echo "benchmark_memory_per_op{benchmark=\"$CLEAN_NAME\"} ${MEMORY_PER_OP/B\/op/}" >> prometheus_metrics.txt
            echo "benchmark_allocs_per_op{benchmark=\"$CLEAN_NAME\"} ${ALLOCS_PER_OP/allocs\/op/}" >> prometheus_metrics.txt
          fi
        done < clean_benchmarks.txt
    - name: Push to Prometheus Pushgateway
      run: |
        # Push metrics to Prometheus Pushgateway (if configured)
        if [ -n "${{ secrets.PROMETHEUS_PUSHGATEWAY_URL }}" ]; then
          echo "Pushing metrics to Prometheus..."
          curl --data-binary @prometheus_metrics.txt \
            "${{ secrets.PROMETHEUS_PUSHGATEWAY_URL }}/metrics/job/benchmark/instance/${{ github.run_id }}"
        else
          echo "PROMETHEUS_PUSHGATEWAY_URL not configured, skipping push"
        fi
    - name: Upload benchmark results
      uses: actions/upload-artifact@v3
      with:
        name: benchmark-results-${{ github.run_id }}
        path: |
          benchmark_results.txt
          clean_benchmarks.txt
          prometheus_metrics.txt
        retention-days: 30
    - name: Display results summary
      run: |
        echo "=== Benchmark Results Summary ==="
        cat prometheus_metrics.txt | grep "benchmark_time_per_op" | head -10
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -207,7 +207,7 @@ jobs:
        # Test deployment scripts
        ./scripts/deploy-secure.sh --help || true
-        ./scripts/deploy-production.sh --help || true
+        ./scripts/deploy-prod.sh --help || true
  security-scan:
    name: Security Scan
--- a/.gitignore
+++ b/.gitignore
@ -209,7 +209,7 @@ secrets/
 cli/src/assets/rsync_release.bin
 # Test files
-test_*.go
+# test_*.go
 *_test_output/
 # Build artifacts  
--- a/.golangci.yml
+++ b/.golangci.yml
@ -23,17 +23,10 @@ linters-settings:
    line-length: 100
  revive:
    confidence: 0.8
  depguard:
    rules:
      main:
        allow:
          - $gostd
          - github.com/jfraeys/fetch_ml
 linters:
  disable-all: true
  enable:
    - bodyclose
    - depguard
    - dogsled
    - dupl
    - errcheck
@ -60,19 +53,37 @@ linters:
    - revive
 issues:
  exclude-rules:
-    - path: _test\.go
+    # G306: File permissions - acceptable for test files and scripts
    - text: "G306:"
      linters:
        - gosec
    # Exclude linters for test files
    - path: ".*_test\\.go"
      linters:
        - gocyclo
        - errcheck
        - dupl
        - gosec
        - lll
-    - text: "weak cryptographic primitive"
+        - gosec
        - revive
    # Exclude errcheck for tests directory
    - path: "^tests/"
      linters:
        - errcheck
    # approve insecureSkipVerify in test files
    - path: _test\.go
      text: "insecureSkipVerify"
      linters:
        - gosec
-    - text: "Use of weak random number generator"
+    # Exclude gosec G204 for tests and tools via source match
    - source: "exec\\.CommandContext"
      path: "(tests|tools)/"
      linters:
        - gosec
    # Exclude revive for api package naming via source match
    - source: "^package api$"
      linters:
        - revive
  max-issues-per-linter: 0
  max-same-issues: 0
 severity:
--- a/.golintrc
+++ b/.golintrc
@ -0,0 +1,22 @@
 # Golint configuration file
 # This file configures golint to exclude certain checks that conflict with gosec
 # Exclude golint checks that are handled by gosec or are not relevant
 # Format: <package>:<check_name>
 # Exclude type name stuttering warnings for auth package (handled by gosec)
 internal/auth:stutter
 # Exclude package comment format warnings for certain packages
 internal/config:packageComments
 internal/container:packageComments
 internal/errors:packageComments
 # Exclude blank import warnings for test files
 *_test.go:blankImports
 # Exclude comment format warnings for certain exported variables
 internal/queue:varComment
 # Exclude struct field comment warnings
 internal/*:structComment
--- a/.local-artifacts/run_20251204_230712/benchmark_results.txt
+++ b/.local-artifacts/run_20251204_230712/benchmark_results.txt
@ -0,0 +1,21 @@
 goos: darwin
 goarch: arm64
 pkg: github.com/jfraeys/fetch_ml/tests/benchmarks
 cpu: Apple M2 Ultra
 BenchmarkAPIServerCreateJobSimple-24    	   25867	     44784 ns/op	   13520 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	58569440	        19.87 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    104650 ns/op	   26708 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    8703	    553714 ns/op	   18123 B/op	     131 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     918	   1357144 ns/op	    6088 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     908	   1351232 ns/op	    6466 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     909	   1338756 ns/op	    6719 B/op	      74 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      42	  26589647 ns/op	  657022 B/op	   12350 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      94	  12482363 ns/op	  794538 B/op	    6254 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     358	   3631202 ns/op	 1129266 B/op	    1376 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      34	  35603358 ns/op	 1111297 B/op	   12625 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      55	  27881781 ns/op	  615782 B/op	   17884 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12116	     98516 ns/op	    2933 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	23803464	        49.69 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18534	     65030 ns/op	    1285 B/op	      36 allocs/op
 PASS
 ok  	github.com/jfraeys/fetch_ml/tests/benchmarks	28.082s
--- a/.local-artifacts/run_20251204_230712/clean_benchmarks.txt
+++ b/.local-artifacts/run_20251204_230712/clean_benchmarks.txt
@ -0,0 +1,15 @@
 BenchmarkAPIServerCreateJobSimple-24    	   25867	     44784 ns/op	   13520 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	58569440	        19.87 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    104650 ns/op	   26708 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    8703	    553714 ns/op	   18123 B/op	     131 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     918	   1357144 ns/op	    6088 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     908	   1351232 ns/op	    6466 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     909	   1338756 ns/op	    6719 B/op	      74 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      42	  26589647 ns/op	  657022 B/op	   12350 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      94	  12482363 ns/op	  794538 B/op	    6254 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     358	   3631202 ns/op	 1129266 B/op	    1376 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      34	  35603358 ns/op	 1111297 B/op	   12625 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      55	  27881781 ns/op	  615782 B/op	   17884 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12116	     98516 ns/op	    2933 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	23803464	        49.69 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18534	     65030 ns/op	    1285 B/op	      36 allocs/op
--- a/.local-artifacts/run_20251204_230712/prometheus_metrics.txt
+++ b/.local-artifacts/run_20251204_230712/prometheus_metrics.txt
@ -0,0 +1,51 @@
 # HELP benchmark_time_per_op Time per operation in nanoseconds
 # TYPE benchmark_time_per_op gauge
 # HELP benchmark_memory_per_op Memory per operation in bytes
 # TYPE benchmark_memory_per_op gauge
 # HELP benchmark_allocs_per_op Allocations per operation
 # TYPE benchmark_allocs_per_op gauge
 benchmark_time_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} 44784
 benchmark_memory_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} 13520
 benchmark_time_per_op{benchmark="BenchmarkMetricsCollection"} 19.87
 benchmark_memory_per_op{benchmark="BenchmarkMetricsCollection"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMetricsCollection"} 0
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} 104650
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} 26708
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} 553714
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} 18123
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} 1357144
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} 6088
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} 1351232
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} 6466
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} 1338756
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} 6719
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} 26589647
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} 657022
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} 12482363
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} 794538
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} 3631202
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} 1129266
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} 35603358
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} 1111297
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} 27881781
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} 615782
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} 98516
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} 2933
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} 49.69
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} 16
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} 65030
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} 1285
--- a/.local-artifacts/run_20251204_230712/report.html
+++ b/.local-artifacts/run_20251204_230712/report.html
@ -0,0 +1,141 @@
 <!DOCTYPE html>
 <html>
 <head>
    <title>Benchmark Report - 20251204_230712</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 20px; }
        table { border-collapse: collapse; width: 100%; }
        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
        th { background-color: #f2f2f2; }
        .metric { font-family: monospace; }
    </style>
 </head>
 <body>
    <h1>Benchmark Report</h1>
    <p><strong>Run ID:</strong> 20251204_230712</p>
    <p><strong>Date:</strong> Thu Dec  4 18:07:41 EST 2025</p>
    <h2>Results</h2>
    <table>
        <tr>
            <th>Benchmark</th>
            <th>Time (ns/op)</th>
            <th>Memory (B/op)</th>
            <th>Allocs (allocs/op)</th>
        </tr>
        <tr>
            <td class="metric">BenchmarkAPIServerCreateJobSimple-24</td>
            <td>44784</td>
            <td>ns/op</td>
            <td>13520</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMetricsCollection-24</td>
            <td>19.87</td>
            <td>ns/op</td>
            <td>0</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-1-24</td>
            <td>104650</td>
            <td>ns/op</td>
            <td>26708</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-5-24</td>
            <td>553714</td>
            <td>ns/op</td>
            <td>18123</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-10-24</td>
            <td>1357144</td>
            <td>ns/op</td>
            <td>6088</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-25-24</td>
            <td>1351232</td>
            <td>ns/op</td>
            <td>6466</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-50-24</td>
            <td>1338756</td>
            <td>ns/op</td>
            <td>6719</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/SmallExperiment-24</td>
            <td>26589647</td>
            <td>ns/op</td>
            <td>657022</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/MediumExperiment-24</td>
            <td>12482363</td>
            <td>ns/op</td>
            <td>794538</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/LargeExperiment-24</td>
            <td>3631202</td>
            <td>ns/op</td>
            <td>1129266</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/ConcurrentExperiments-24</td>
            <td>35603358</td>
            <td>ns/op</td>
            <td>1111297</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/ExperimentMetrics-24</td>
            <td>27881781</td>
            <td>ns/op</td>
            <td>615782</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetCreation-24</td>
            <td>98516</td>
            <td>ns/op</td>
            <td>2933</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetRetrieval-24</td>
            <td>49.69</td>
            <td>ns/op</td>
            <td>16</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetUpdate-24</td>
            <td>65030</td>
            <td>ns/op</td>
            <td>1285</td>
        </tr>
    </table>
    <h2>Raw Output</h2>
    <pre>goos: darwin
 goarch: arm64
 pkg: github.com/jfraeys/fetch_ml/tests/benchmarks
 cpu: Apple M2 Ultra
 BenchmarkAPIServerCreateJobSimple-24    	   25867	     44784 ns/op	   13520 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	58569440	        19.87 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    104650 ns/op	   26708 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    8703	    553714 ns/op	   18123 B/op	     131 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     918	   1357144 ns/op	    6088 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     908	   1351232 ns/op	    6466 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     909	   1338756 ns/op	    6719 B/op	      74 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      42	  26589647 ns/op	  657022 B/op	   12350 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      94	  12482363 ns/op	  794538 B/op	    6254 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     358	   3631202 ns/op	 1129266 B/op	    1376 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      34	  35603358 ns/op	 1111297 B/op	   12625 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      55	  27881781 ns/op	  615782 B/op	   17884 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12116	     98516 ns/op	    2933 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	23803464	        49.69 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18534	     65030 ns/op	    1285 B/op	      36 allocs/op
 PASS
 ok  	github.com/jfraeys/fetch_ml/tests/benchmarks	28.082s</pre>
 </body>
 </html>
--- a/.local-artifacts/run_20251204_231218/benchmark_results.txt
+++ b/.local-artifacts/run_20251204_231218/benchmark_results.txt
@ -0,0 +1,21 @@
 goos: darwin
 goarch: arm64
 pkg: github.com/jfraeys/fetch_ml/tests/benchmarks
 cpu: Apple M2 Ultra
 BenchmarkAPIServerCreateJobSimple-24    	   28656	     43062 ns/op	   13518 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	59213934	        19.29 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    108510 ns/op	   26825 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    9895	    657334 ns/op	   16807 B/op	     124 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     914	   1346314 ns/op	    6032 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     885	   1350853 ns/op	    6289 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     853	   1346826 ns/op	    6431 B/op	      75 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      43	  26357159 ns/op	  657854 B/op	   12354 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      92	  12494936 ns/op	  794812 B/op	    6254 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     348	   3659886 ns/op	 1129733 B/op	    1376 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      40	  32637755 ns/op	 1114183 B/op	   12636 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      56	  27153394 ns/op	  615897 B/op	   17885 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   10000	    102638 ns/op	    2921 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	20641564	        50.73 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   20919	     65724 ns/op	    1285 B/op	      36 allocs/op
 PASS
 ok  	github.com/jfraeys/fetch_ml/tests/benchmarks	26.755s
--- a/.local-artifacts/run_20251204_231218/clean_benchmarks.txt
+++ b/.local-artifacts/run_20251204_231218/clean_benchmarks.txt
@ -0,0 +1,15 @@
 BenchmarkAPIServerCreateJobSimple-24    	   28656	     43062 ns/op	   13518 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	59213934	        19.29 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    108510 ns/op	   26825 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    9895	    657334 ns/op	   16807 B/op	     124 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     914	   1346314 ns/op	    6032 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     885	   1350853 ns/op	    6289 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     853	   1346826 ns/op	    6431 B/op	      75 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      43	  26357159 ns/op	  657854 B/op	   12354 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      92	  12494936 ns/op	  794812 B/op	    6254 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     348	   3659886 ns/op	 1129733 B/op	    1376 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      40	  32637755 ns/op	 1114183 B/op	   12636 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      56	  27153394 ns/op	  615897 B/op	   17885 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   10000	    102638 ns/op	    2921 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	20641564	        50.73 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   20919	     65724 ns/op	    1285 B/op	      36 allocs/op
--- a/.local-artifacts/run_20251204_231218/prometheus_metrics.txt
+++ b/.local-artifacts/run_20251204_231218/prometheus_metrics.txt
@ -0,0 +1,51 @@
 # HELP benchmark_time_per_op Time per operation in nanoseconds
 # TYPE benchmark_time_per_op gauge
 # HELP benchmark_memory_per_op Memory per operation in bytes
 # TYPE benchmark_memory_per_op gauge
 # HELP benchmark_allocs_per_op Allocations per operation
 # TYPE benchmark_allocs_per_op gauge
 benchmark_time_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} 43062
 benchmark_memory_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} 13518
 benchmark_time_per_op{benchmark="BenchmarkMetricsCollection"} 19.29
 benchmark_memory_per_op{benchmark="BenchmarkMetricsCollection"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMetricsCollection"} 0
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} 108510
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} 26825
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} 657334
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} 16807
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} 1346314
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} 6032
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} 1350853
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} 6289
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} 1346826
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} 6431
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} 26357159
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} 657854
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} 12494936
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} 794812
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} 3659886
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} 1129733
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} 32637755
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} 1114183
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} 27153394
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} 615897
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} 102638
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} 2921
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} 50.73
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} 16
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} 65724
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} 1285
--- a/.local-artifacts/run_20251204_231218/report.html
+++ b/.local-artifacts/run_20251204_231218/report.html
@ -0,0 +1,141 @@
 <!DOCTYPE html>
 <html>
 <head>
    <title>Benchmark Report - 20251204_231218</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 20px; }
        table { border-collapse: collapse; width: 100%; }
        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
        th { background-color: #f2f2f2; }
        .metric { font-family: monospace; }
    </style>
 </head>
 <body>
    <h1>Benchmark Report</h1>
    <p><strong>Run ID:</strong> 20251204_231218</p>
    <p><strong>Date:</strong> Thu Dec  4 18:12:46 EST 2025</p>
    <h2>Results</h2>
    <table>
        <tr>
            <th>Benchmark</th>
            <th>Time (ns/op)</th>
            <th>Memory (B/op)</th>
            <th>Allocs (allocs/op)</th>
        </tr>
        <tr>
            <td class="metric">BenchmarkAPIServerCreateJobSimple-24</td>
            <td>43062</td>
            <td>ns/op</td>
            <td>13518</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMetricsCollection-24</td>
            <td>19.29</td>
            <td>ns/op</td>
            <td>0</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-1-24</td>
            <td>108510</td>
            <td>ns/op</td>
            <td>26825</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-5-24</td>
            <td>657334</td>
            <td>ns/op</td>
            <td>16807</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-10-24</td>
            <td>1346314</td>
            <td>ns/op</td>
            <td>6032</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-25-24</td>
            <td>1350853</td>
            <td>ns/op</td>
            <td>6289</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-50-24</td>
            <td>1346826</td>
            <td>ns/op</td>
            <td>6431</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/SmallExperiment-24</td>
            <td>26357159</td>
            <td>ns/op</td>
            <td>657854</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/MediumExperiment-24</td>
            <td>12494936</td>
            <td>ns/op</td>
            <td>794812</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/LargeExperiment-24</td>
            <td>3659886</td>
            <td>ns/op</td>
            <td>1129733</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/ConcurrentExperiments-24</td>
            <td>32637755</td>
            <td>ns/op</td>
            <td>1114183</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/ExperimentMetrics-24</td>
            <td>27153394</td>
            <td>ns/op</td>
            <td>615897</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetCreation-24</td>
            <td>102638</td>
            <td>ns/op</td>
            <td>2921</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetRetrieval-24</td>
            <td>50.73</td>
            <td>ns/op</td>
            <td>16</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetUpdate-24</td>
            <td>65724</td>
            <td>ns/op</td>
            <td>1285</td>
        </tr>
    </table>
    <h2>Raw Output</h2>
    <pre>goos: darwin
 goarch: arm64
 pkg: github.com/jfraeys/fetch_ml/tests/benchmarks
 cpu: Apple M2 Ultra
 BenchmarkAPIServerCreateJobSimple-24    	   28656	     43062 ns/op	   13518 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	59213934	        19.29 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    108510 ns/op	   26825 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    9895	    657334 ns/op	   16807 B/op	     124 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     914	   1346314 ns/op	    6032 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     885	   1350853 ns/op	    6289 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     853	   1346826 ns/op	    6431 B/op	      75 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      43	  26357159 ns/op	  657854 B/op	   12354 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      92	  12494936 ns/op	  794812 B/op	    6254 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     348	   3659886 ns/op	 1129733 B/op	    1376 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      40	  32637755 ns/op	 1114183 B/op	   12636 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      56	  27153394 ns/op	  615897 B/op	   17885 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   10000	    102638 ns/op	    2921 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	20641564	        50.73 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   20919	     65724 ns/op	    1285 B/op	      36 allocs/op
 PASS
 ok  	github.com/jfraeys/fetch_ml/tests/benchmarks	26.755s</pre>
 </body>
 </html>
--- a/.local-artifacts/run_20251204_231255/benchmark_results.txt
+++ b/.local-artifacts/run_20251204_231255/benchmark_results.txt
@ -0,0 +1,21 @@
 goos: darwin
 goarch: arm64
 pkg: github.com/jfraeys/fetch_ml/tests/benchmarks
 cpu: Apple M2 Ultra
 BenchmarkAPIServerCreateJobSimple-24    	   28408	     45304 ns/op	   13517 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	60437035	        19.88 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    106089 ns/op	   26846 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    9220	    579691 ns/op	   17615 B/op	     128 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     925	   1348616 ns/op	    6050 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     927	   1340898 ns/op	    6529 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     916	   1333626 ns/op	    6694 B/op	      74 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      43	  26542657 ns/op	  656983 B/op	   12350 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	     100	  12121203 ns/op	  794420 B/op	    6253 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     358	   3704013 ns/op	 1128981 B/op	    1374 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      34	  32337061 ns/op	 1113039 B/op	   12630 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      52	  26482224 ns/op	  615734 B/op	   17883 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12356	    101514 ns/op	    2934 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	24143787	        49.80 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18423	     65515 ns/op	    1285 B/op	      36 allocs/op
 PASS
 ok  	github.com/jfraeys/fetch_ml/tests/benchmarks	28.641s
--- a/.local-artifacts/run_20251204_231255/clean_benchmarks.txt
+++ b/.local-artifacts/run_20251204_231255/clean_benchmarks.txt
@ -0,0 +1,15 @@
 BenchmarkAPIServerCreateJobSimple-24    	   28408	     45304 ns/op	   13517 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	60437035	        19.88 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    106089 ns/op	   26846 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    9220	    579691 ns/op	   17615 B/op	     128 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     925	   1348616 ns/op	    6050 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     927	   1340898 ns/op	    6529 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     916	   1333626 ns/op	    6694 B/op	      74 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      43	  26542657 ns/op	  656983 B/op	   12350 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	     100	  12121203 ns/op	  794420 B/op	    6253 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     358	   3704013 ns/op	 1128981 B/op	    1374 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      34	  32337061 ns/op	 1113039 B/op	   12630 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      52	  26482224 ns/op	  615734 B/op	   17883 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12356	    101514 ns/op	    2934 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	24143787	        49.80 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18423	     65515 ns/op	    1285 B/op	      36 allocs/op
--- a/.local-artifacts/run_20251204_231255/prometheus_metrics.txt
+++ b/.local-artifacts/run_20251204_231255/prometheus_metrics.txt
@ -0,0 +1,51 @@
 # HELP benchmark_time_per_op Time per operation in nanoseconds
 # TYPE benchmark_time_per_op gauge
 # HELP benchmark_memory_per_op Memory per operation in bytes
 # TYPE benchmark_memory_per_op gauge
 # HELP benchmark_allocs_per_op Allocations per operation
 # TYPE benchmark_allocs_per_op gauge
 benchmark_time_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} 45304
 benchmark_memory_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} 13517
 benchmark_time_per_op{benchmark="BenchmarkMetricsCollection"} 19.88
 benchmark_memory_per_op{benchmark="BenchmarkMetricsCollection"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMetricsCollection"} 0
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} 106089
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} 26846
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} 579691
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} 17615
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} 1348616
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} 6050
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} 1340898
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} 6529
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} 1333626
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} 6694
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} 26542657
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} 656983
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} 12121203
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} 794420
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} 3704013
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} 1128981
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} 32337061
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} 1113039
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} 26482224
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} 615734
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} 101514
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} 2934
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} 49.80
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} 16
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} 65515
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} 1285
--- a/.local-artifacts/run_20251204_231255/report.html
+++ b/.local-artifacts/run_20251204_231255/report.html
@ -0,0 +1,141 @@
 <!DOCTYPE html>
 <html>
 <head>
    <title>Benchmark Report - 20251204_231255</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 20px; }
        table { border-collapse: collapse; width: 100%; }
        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
        th { background-color: #f2f2f2; }
        .metric { font-family: monospace; }
    </style>
 </head>
 <body>
    <h1>Benchmark Report</h1>
    <p><strong>Run ID:</strong> 20251204_231255</p>
    <p><strong>Date:</strong> Thu Dec  4 18:13:24 EST 2025</p>
    <h2>Results</h2>
    <table>
        <tr>
            <th>Benchmark</th>
            <th>Time (ns/op)</th>
            <th>Memory (B/op)</th>
            <th>Allocs (allocs/op)</th>
        </tr>
        <tr>
            <td class="metric">BenchmarkAPIServerCreateJobSimple-24</td>
            <td>45304</td>
            <td>ns/op</td>
            <td>13517</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMetricsCollection-24</td>
            <td>19.88</td>
            <td>ns/op</td>
            <td>0</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-1-24</td>
            <td>106089</td>
            <td>ns/op</td>
            <td>26846</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-5-24</td>
            <td>579691</td>
            <td>ns/op</td>
            <td>17615</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-10-24</td>
            <td>1348616</td>
            <td>ns/op</td>
            <td>6050</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-25-24</td>
            <td>1340898</td>
            <td>ns/op</td>
            <td>6529</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-50-24</td>
            <td>1333626</td>
            <td>ns/op</td>
            <td>6694</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/SmallExperiment-24</td>
            <td>26542657</td>
            <td>ns/op</td>
            <td>656983</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/MediumExperiment-24</td>
            <td>12121203</td>
            <td>ns/op</td>
            <td>794420</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/LargeExperiment-24</td>
            <td>3704013</td>
            <td>ns/op</td>
            <td>1128981</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/ConcurrentExperiments-24</td>
            <td>32337061</td>
            <td>ns/op</td>
            <td>1113039</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/ExperimentMetrics-24</td>
            <td>26482224</td>
            <td>ns/op</td>
            <td>615734</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetCreation-24</td>
            <td>101514</td>
            <td>ns/op</td>
            <td>2934</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetRetrieval-24</td>
            <td>49.80</td>
            <td>ns/op</td>
            <td>16</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetUpdate-24</td>
            <td>65515</td>
            <td>ns/op</td>
            <td>1285</td>
        </tr>
    </table>
    <h2>Raw Output</h2>
    <pre>goos: darwin
 goarch: arm64
 pkg: github.com/jfraeys/fetch_ml/tests/benchmarks
 cpu: Apple M2 Ultra
 BenchmarkAPIServerCreateJobSimple-24    	   28408	     45304 ns/op	   13517 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	60437035	        19.88 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    106089 ns/op	   26846 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    9220	    579691 ns/op	   17615 B/op	     128 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     925	   1348616 ns/op	    6050 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     927	   1340898 ns/op	    6529 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     916	   1333626 ns/op	    6694 B/op	      74 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      43	  26542657 ns/op	  656983 B/op	   12350 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	     100	  12121203 ns/op	  794420 B/op	    6253 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     358	   3704013 ns/op	 1128981 B/op	    1374 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      34	  32337061 ns/op	 1113039 B/op	   12630 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      52	  26482224 ns/op	  615734 B/op	   17883 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12356	    101514 ns/op	    2934 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	24143787	        49.80 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18423	     65515 ns/op	    1285 B/op	      36 allocs/op
 PASS
 ok  	github.com/jfraeys/fetch_ml/tests/benchmarks	28.641s</pre>
 </body>
 </html>
--- a/.local-artifacts/run_20251204_231459/benchmark_results.txt
+++ b/.local-artifacts/run_20251204_231459/benchmark_results.txt
@ -0,0 +1,21 @@
 goos: darwin
 goarch: arm64
 pkg: github.com/jfraeys/fetch_ml/tests/benchmarks
 cpu: Apple M2 Ultra
 BenchmarkAPIServerCreateJobSimple-24    	   28129	     45677 ns/op	   13532 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	59903404	        19.48 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    105817 ns/op	   26610 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    8654	    545199 ns/op	   18285 B/op	     132 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     904	   1350040 ns/op	    6043 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     924	   1332526 ns/op	    6228 B/op	      72 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     952	   1339113 ns/op	    6724 B/op	      75 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      45	  26355390 ns/op	  657327 B/op	   12351 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      92	  12034762 ns/op	  794688 B/op	    6254 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     351	   3763459 ns/op	 1129490 B/op	    1375 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      37	  30668937 ns/op	 1112708 B/op	   12626 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      56	  26930825 ns/op	  615839 B/op	   17885 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12364	    102242 ns/op	    2935 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	23809105	        50.50 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18494	     65564 ns/op	    1285 B/op	      36 allocs/op
 PASS
 ok  	github.com/jfraeys/fetch_ml/tests/benchmarks	27.143s
--- a/.local-artifacts/run_20251204_231459/clean_benchmarks.txt
+++ b/.local-artifacts/run_20251204_231459/clean_benchmarks.txt
@ -0,0 +1,15 @@
 BenchmarkAPIServerCreateJobSimple-24    	   28129	     45677 ns/op	   13532 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	59903404	        19.48 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    105817 ns/op	   26610 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    8654	    545199 ns/op	   18285 B/op	     132 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     904	   1350040 ns/op	    6043 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     924	   1332526 ns/op	    6228 B/op	      72 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     952	   1339113 ns/op	    6724 B/op	      75 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      45	  26355390 ns/op	  657327 B/op	   12351 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      92	  12034762 ns/op	  794688 B/op	    6254 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     351	   3763459 ns/op	 1129490 B/op	    1375 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      37	  30668937 ns/op	 1112708 B/op	   12626 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      56	  26930825 ns/op	  615839 B/op	   17885 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12364	    102242 ns/op	    2935 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	23809105	        50.50 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18494	     65564 ns/op	    1285 B/op	      36 allocs/op
--- a/.local-artifacts/run_20251204_231459/prometheus_metrics.txt
+++ b/.local-artifacts/run_20251204_231459/prometheus_metrics.txt
@ -0,0 +1,51 @@
 # HELP benchmark_time_per_op Time per operation in nanoseconds
 # TYPE benchmark_time_per_op gauge
 # HELP benchmark_memory_per_op Memory per operation in bytes
 # TYPE benchmark_memory_per_op gauge
 # HELP benchmark_allocs_per_op Allocations per operation
 # TYPE benchmark_allocs_per_op gauge
 benchmark_time_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} 45677
 benchmark_memory_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} 13532
 benchmark_time_per_op{benchmark="BenchmarkMetricsCollection"} 19.48
 benchmark_memory_per_op{benchmark="BenchmarkMetricsCollection"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMetricsCollection"} 0
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} 105817
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} 26610
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} 545199
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} 18285
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} 1350040
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} 6043
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} 1332526
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} 6228
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} 1339113
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} 6724
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} 26355390
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} 657327
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} 12034762
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} 794688
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} 3763459
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} 1129490
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} 30668937
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} 1112708
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} 26930825
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} 615839
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} 102242
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} 2935
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} 50.50
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} 16
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} 65564
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} 1285
--- a/.local-artifacts/run_20251204_231459/report.html
+++ b/.local-artifacts/run_20251204_231459/report.html
@ -0,0 +1,141 @@
 <!DOCTYPE html>
 <html>
 <head>
    <title>Benchmark Report - 20251204_231459</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 20px; }
        table { border-collapse: collapse; width: 100%; }
        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
        th { background-color: #f2f2f2; }
        .metric { font-family: monospace; }
    </style>
 </head>
 <body>
    <h1>Benchmark Report</h1>
    <p><strong>Run ID:</strong> 20251204_231459</p>
    <p><strong>Date:</strong> Thu Dec  4 18:15:28 EST 2025</p>
    <h2>Results</h2>
    <table>
        <tr>
            <th>Benchmark</th>
            <th>Time (ns/op)</th>
            <th>Memory (B/op)</th>
            <th>Allocs (allocs/op)</th>
        </tr>
        <tr>
            <td class="metric">BenchmarkAPIServerCreateJobSimple-24</td>
            <td>45677</td>
            <td>ns/op</td>
            <td>13532</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMetricsCollection-24</td>
            <td>19.48</td>
            <td>ns/op</td>
            <td>0</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-1-24</td>
            <td>105817</td>
            <td>ns/op</td>
            <td>26610</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-5-24</td>
            <td>545199</td>
            <td>ns/op</td>
            <td>18285</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-10-24</td>
            <td>1350040</td>
            <td>ns/op</td>
            <td>6043</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-25-24</td>
            <td>1332526</td>
            <td>ns/op</td>
            <td>6228</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-50-24</td>
            <td>1339113</td>
            <td>ns/op</td>
            <td>6724</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/SmallExperiment-24</td>
            <td>26355390</td>
            <td>ns/op</td>
            <td>657327</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/MediumExperiment-24</td>
            <td>12034762</td>
            <td>ns/op</td>
            <td>794688</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/LargeExperiment-24</td>
            <td>3763459</td>
            <td>ns/op</td>
            <td>1129490</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/ConcurrentExperiments-24</td>
            <td>30668937</td>
            <td>ns/op</td>
            <td>1112708</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/ExperimentMetrics-24</td>
            <td>26930825</td>
            <td>ns/op</td>
            <td>615839</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetCreation-24</td>
            <td>102242</td>
            <td>ns/op</td>
            <td>2935</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetRetrieval-24</td>
            <td>50.50</td>
            <td>ns/op</td>
            <td>16</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetUpdate-24</td>
            <td>65564</td>
            <td>ns/op</td>
            <td>1285</td>
        </tr>
    </table>
    <h2>Raw Output</h2>
    <pre>goos: darwin
 goarch: arm64
 pkg: github.com/jfraeys/fetch_ml/tests/benchmarks
 cpu: Apple M2 Ultra
 BenchmarkAPIServerCreateJobSimple-24    	   28129	     45677 ns/op	   13532 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	59903404	        19.48 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    105817 ns/op	   26610 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    8654	    545199 ns/op	   18285 B/op	     132 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     904	   1350040 ns/op	    6043 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     924	   1332526 ns/op	    6228 B/op	      72 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     952	   1339113 ns/op	    6724 B/op	      75 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      45	  26355390 ns/op	  657327 B/op	   12351 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      92	  12034762 ns/op	  794688 B/op	    6254 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     351	   3763459 ns/op	 1129490 B/op	    1375 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      37	  30668937 ns/op	 1112708 B/op	   12626 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      56	  26930825 ns/op	  615839 B/op	   17885 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12364	    102242 ns/op	    2935 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	23809105	        50.50 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18494	     65564 ns/op	    1285 B/op	      36 allocs/op
 PASS
 ok  	github.com/jfraeys/fetch_ml/tests/benchmarks	27.143s</pre>
 </body>
 </html>
--- a/.local-artifacts/run_20251204_231634/benchmark_results.txt
+++ b/.local-artifacts/run_20251204_231634/benchmark_results.txt
@ -0,0 +1,21 @@
 goos: darwin
 goarch: arm64
 pkg: github.com/jfraeys/fetch_ml/tests/benchmarks
 cpu: Apple M2 Ultra
 BenchmarkAPIServerCreateJobSimple-24    	   26200	     44608 ns/op	   13514 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	58956229	        19.88 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    108040 ns/op	   26965 B/op	     163 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	   10000	    657977 ns/op	   16658 B/op	     123 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     843	   1342869 ns/op	    6078 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     902	   1356405 ns/op	    6555 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     907	   1341416 ns/op	    6429 B/op	      74 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      39	  26197300 ns/op	  657330 B/op	   12352 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	     100	  12172133 ns/op	  794610 B/op	    6253 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     348	   3686597 ns/op	 1129573 B/op	    1375 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      33	  33467878 ns/op	 1111544 B/op	   12626 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      57	  27330560 ns/op	  615815 B/op	   17885 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12249	     97669 ns/op	    2934 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	24545986	        50.01 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18687	     65891 ns/op	    1285 B/op	      36 allocs/op
 PASS
 ok  	github.com/jfraeys/fetch_ml/tests/benchmarks	28.732s
--- a/.local-artifacts/run_20251204_231634/clean_benchmarks.txt
+++ b/.local-artifacts/run_20251204_231634/clean_benchmarks.txt
@ -0,0 +1,15 @@
 BenchmarkAPIServerCreateJobSimple-24    	   26200	     44608 ns/op	   13514 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	58956229	        19.88 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    108040 ns/op	   26965 B/op	     163 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	   10000	    657977 ns/op	   16658 B/op	     123 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     843	   1342869 ns/op	    6078 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     902	   1356405 ns/op	    6555 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     907	   1341416 ns/op	    6429 B/op	      74 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      39	  26197300 ns/op	  657330 B/op	   12352 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	     100	  12172133 ns/op	  794610 B/op	    6253 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     348	   3686597 ns/op	 1129573 B/op	    1375 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      33	  33467878 ns/op	 1111544 B/op	   12626 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      57	  27330560 ns/op	  615815 B/op	   17885 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12249	     97669 ns/op	    2934 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	24545986	        50.01 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18687	     65891 ns/op	    1285 B/op	      36 allocs/op
--- a/.local-artifacts/run_20251204_231634/prometheus_metrics.txt
+++ b/.local-artifacts/run_20251204_231634/prometheus_metrics.txt
@ -0,0 +1,51 @@
 # HELP benchmark_time_per_op Time per operation in nanoseconds
 # TYPE benchmark_time_per_op gauge
 # HELP benchmark_memory_per_op Memory per operation in bytes
 # TYPE benchmark_memory_per_op gauge
 # HELP benchmark_allocs_per_op Allocations per operation
 # TYPE benchmark_allocs_per_op gauge
 benchmark_time_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} 44608
 benchmark_memory_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} 13514
 benchmark_time_per_op{benchmark="BenchmarkMetricsCollection"} 19.88
 benchmark_memory_per_op{benchmark="BenchmarkMetricsCollection"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMetricsCollection"} 0
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} 108040
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} 26965
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} 657977
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} 16658
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} 1342869
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} 6078
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} 1356405
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} 6555
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} 1341416
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} 6429
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} 26197300
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} 657330
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} 12172133
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} 794610
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} 3686597
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} 1129573
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} 33467878
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} 1111544
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} 27330560
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} 615815
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} 97669
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} 2934
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} 50.01
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} 16
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} 65891
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} 1285
--- a/.local-artifacts/run_20251204_231712/benchmark_results.txt
+++ b/.local-artifacts/run_20251204_231712/benchmark_results.txt
@ -0,0 +1,21 @@
 goos: darwin
 goarch: arm64
 pkg: github.com/jfraeys/fetch_ml/tests/benchmarks
 cpu: Apple M2 Ultra
 BenchmarkAPIServerCreateJobSimple-24    	   27906	     45877 ns/op	   13520 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	59125434	        19.91 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    106563 ns/op	   26651 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    9916	    580762 ns/op	   16774 B/op	     124 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     902	   1361628 ns/op	    6050 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     913	   1341660 ns/op	    6645 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     902	   1339436 ns/op	    6130 B/op	      73 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      42	  26144707 ns/op	  657412 B/op	   12352 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      99	  12045172 ns/op	  794945 B/op	    6255 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     350	   3655986 ns/op	 1129633 B/op	    1375 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      30	  37392029 ns/op	 1111096 B/op	   12623 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      52	  26943573 ns/op	  615802 B/op	   17883 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12285	    101658 ns/op	    2934 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	24175867	        50.53 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18481	     65640 ns/op	    1285 B/op	      36 allocs/op
 PASS
 ok  	github.com/jfraeys/fetch_ml/tests/benchmarks	29.026s
--- a/.local-artifacts/run_20251204_231712/clean_benchmarks.txt
+++ b/.local-artifacts/run_20251204_231712/clean_benchmarks.txt
@ -0,0 +1,15 @@
 BenchmarkAPIServerCreateJobSimple-24    	   27906	     45877 ns/op	   13520 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	59125434	        19.91 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    106563 ns/op	   26651 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    9916	    580762 ns/op	   16774 B/op	     124 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     902	   1361628 ns/op	    6050 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     913	   1341660 ns/op	    6645 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     902	   1339436 ns/op	    6130 B/op	      73 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      42	  26144707 ns/op	  657412 B/op	   12352 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      99	  12045172 ns/op	  794945 B/op	    6255 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     350	   3655986 ns/op	 1129633 B/op	    1375 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      30	  37392029 ns/op	 1111096 B/op	   12623 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      52	  26943573 ns/op	  615802 B/op	   17883 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12285	    101658 ns/op	    2934 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	24175867	        50.53 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18481	     65640 ns/op	    1285 B/op	      36 allocs/op
--- a/.local-artifacts/run_20251204_231712/prometheus_metrics.txt
+++ b/.local-artifacts/run_20251204_231712/prometheus_metrics.txt
@ -0,0 +1,51 @@
 # HELP benchmark_time_per_op Time per operation in nanoseconds
 # TYPE benchmark_time_per_op gauge
 # HELP benchmark_memory_per_op Memory per operation in bytes
 # TYPE benchmark_memory_per_op gauge
 # HELP benchmark_allocs_per_op Allocations per operation
 # TYPE benchmark_allocs_per_op gauge
 benchmark_time_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} 45877
 benchmark_memory_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} 13520
 benchmark_time_per_op{benchmark="BenchmarkMetricsCollection"} 19.91
 benchmark_memory_per_op{benchmark="BenchmarkMetricsCollection"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMetricsCollection"} 0
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} 106563
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} 26651
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} 580762
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} 16774
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} 1361628
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} 6050
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} 1341660
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} 6645
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} 1339436
 benchmark_memory_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} 6130
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} 26144707
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} 657412
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} 12045172
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} 794945
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} 3655986
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} 1129633
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} 37392029
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} 1111096
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} 26943573
 benchmark_memory_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} 615802
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} 101658
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} 2934
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} 50.53
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} 16
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} 65640
 benchmark_memory_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} ns/op
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} 1285
--- a/.local-artifacts/run_20251204_231712/report.html
+++ b/.local-artifacts/run_20251204_231712/report.html
@ -0,0 +1,141 @@
 <!DOCTYPE html>
 <html>
 <head>
    <title>Benchmark Report - 20251204_231712</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 20px; }
        table { border-collapse: collapse; width: 100%; }
        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
        th { background-color: #f2f2f2; }
        .metric { font-family: monospace; }
    </style>
 </head>
 <body>
    <h1>Benchmark Report</h1>
    <p><strong>Run ID:</strong> 20251204_231712</p>
    <p><strong>Date:</strong> Thu Dec  4 18:17:42 EST 2025</p>
    <h2>Results</h2>
    <table>
        <tr>
            <th>Benchmark</th>
            <th>Time (ns/op)</th>
            <th>Memory (B/op)</th>
            <th>Allocs (allocs/op)</th>
        </tr>
        <tr>
            <td class="metric">BenchmarkAPIServerCreateJobSimple-24</td>
            <td>45877</td>
            <td>ns/op</td>
            <td>13520</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMetricsCollection-24</td>
            <td>19.91</td>
            <td>ns/op</td>
            <td>0</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-1-24</td>
            <td>106563</td>
            <td>ns/op</td>
            <td>26651</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-5-24</td>
            <td>580762</td>
            <td>ns/op</td>
            <td>16774</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-10-24</td>
            <td>1361628</td>
            <td>ns/op</td>
            <td>6050</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-25-24</td>
            <td>1341660</td>
            <td>ns/op</td>
            <td>6645</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-50-24</td>
            <td>1339436</td>
            <td>ns/op</td>
            <td>6130</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/SmallExperiment-24</td>
            <td>26144707</td>
            <td>ns/op</td>
            <td>657412</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/MediumExperiment-24</td>
            <td>12045172</td>
            <td>ns/op</td>
            <td>794945</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/LargeExperiment-24</td>
            <td>3655986</td>
            <td>ns/op</td>
            <td>1129633</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/ConcurrentExperiments-24</td>
            <td>37392029</td>
            <td>ns/op</td>
            <td>1111096</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/ExperimentMetrics-24</td>
            <td>26943573</td>
            <td>ns/op</td>
            <td>615802</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetCreation-24</td>
            <td>101658</td>
            <td>ns/op</td>
            <td>2934</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetRetrieval-24</td>
            <td>50.53</td>
            <td>ns/op</td>
            <td>16</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetUpdate-24</td>
            <td>65640</td>
            <td>ns/op</td>
            <td>1285</td>
        </tr>
    </table>
    <h2>Raw Output</h2>
    <pre>goos: darwin
 goarch: arm64
 pkg: github.com/jfraeys/fetch_ml/tests/benchmarks
 cpu: Apple M2 Ultra
 BenchmarkAPIServerCreateJobSimple-24    	   27906	     45877 ns/op	   13520 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	59125434	        19.91 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    106563 ns/op	   26651 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    9916	    580762 ns/op	   16774 B/op	     124 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     902	   1361628 ns/op	    6050 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     913	   1341660 ns/op	    6645 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     902	   1339436 ns/op	    6130 B/op	      73 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      42	  26144707 ns/op	  657412 B/op	   12352 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      99	  12045172 ns/op	  794945 B/op	    6255 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     350	   3655986 ns/op	 1129633 B/op	    1375 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      30	  37392029 ns/op	 1111096 B/op	   12623 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      52	  26943573 ns/op	  615802 B/op	   17883 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12285	    101658 ns/op	    2934 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	24175867	        50.53 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18481	     65640 ns/op	    1285 B/op	      36 allocs/op
 PASS
 ok  	github.com/jfraeys/fetch_ml/tests/benchmarks	29.026s</pre>
 </body>
 </html>
--- a/.local-artifacts/run_20251204_231833/benchmark_results.txt
+++ b/.local-artifacts/run_20251204_231833/benchmark_results.txt
@ -0,0 +1,21 @@
 goos: darwin
 goarch: arm64
 pkg: github.com/jfraeys/fetch_ml/tests/benchmarks
 cpu: Apple M2 Ultra
 BenchmarkAPIServerCreateJobSimple-24    	   27950	     44615 ns/op	   13510 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	61569640	        19.81 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    104975 ns/op	   26775 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    9744	    616978 ns/op	   16959 B/op	     125 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     921	   1342897 ns/op	    6123 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     916	   1355236 ns/op	    6286 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     930	   1326230 ns/op	    6997 B/op	      76 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      44	  26734717 ns/op	  657047 B/op	   12351 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      93	  12165317 ns/op	  794462 B/op	    6254 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     364	   3637957 ns/op	 1128897 B/op	    1374 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      33	  31061085 ns/op	 1114816 B/op	   12631 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      54	  26862161 ns/op	  615718 B/op	   17884 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12193	    102081 ns/op	    2933 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	19180039	        52.64 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18472	     65401 ns/op	    1285 B/op	      36 allocs/op
 PASS
 ok  	github.com/jfraeys/fetch_ml/tests/benchmarks	27.261s
--- a/.local-artifacts/run_20251204_231833/clean_benchmarks.txt
+++ b/.local-artifacts/run_20251204_231833/clean_benchmarks.txt
@ -0,0 +1,15 @@
 BenchmarkAPIServerCreateJobSimple-24    	   27950	     44615 ns/op	   13510 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	61569640	        19.81 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    104975 ns/op	   26775 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    9744	    616978 ns/op	   16959 B/op	     125 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     921	   1342897 ns/op	    6123 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     916	   1355236 ns/op	    6286 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     930	   1326230 ns/op	    6997 B/op	      76 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      44	  26734717 ns/op	  657047 B/op	   12351 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      93	  12165317 ns/op	  794462 B/op	    6254 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     364	   3637957 ns/op	 1128897 B/op	    1374 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      33	  31061085 ns/op	 1114816 B/op	   12631 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      54	  26862161 ns/op	  615718 B/op	   17884 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12193	    102081 ns/op	    2933 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	19180039	        52.64 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18472	     65401 ns/op	    1285 B/op	      36 allocs/op
--- a/.local-artifacts/run_20251204_231833/prometheus_metrics.txt
+++ b/.local-artifacts/run_20251204_231833/prometheus_metrics.txt
@ -0,0 +1,36 @@
 # HELP benchmark_time_per_op Time per operation in nanoseconds
 # TYPE benchmark_time_per_op gauge
 # HELP benchmark_memory_per_op Memory per operation in bytes
 # TYPE benchmark_memory_per_op gauge
 # HELP benchmark_allocs_per_op Allocations per operation
 # TYPE benchmark_allocs_per_op gauge
 benchmark_time_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} 44615
 benchmark_allocs_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} 13510
 benchmark_time_per_op{benchmark="BenchmarkMetricsCollection"} 19.81
 benchmark_allocs_per_op{benchmark="BenchmarkMetricsCollection"} 0
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} 104975
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} 26775
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} 616978
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} 16959
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} 1342897
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} 6123
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} 1355236
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} 6286
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} 1326230
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} 6997
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} 26734717
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} 657047
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} 12165317
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} 794462
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} 3637957
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} 1128897
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} 31061085
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} 1114816
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} 26862161
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} 615718
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} 102081
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} 2933
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} 52.64
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} 16
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} 65401
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} 1285
--- a/.local-artifacts/run_20251204_231833/report.html
+++ b/.local-artifacts/run_20251204_231833/report.html
@ -0,0 +1,141 @@
 <!DOCTYPE html>
 <html>
 <head>
    <title>Benchmark Report - 20251204_231833</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 20px; }
        table { border-collapse: collapse; width: 100%; }
        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
        th { background-color: #f2f2f2; }
        .metric { font-family: monospace; }
    </style>
 </head>
 <body>
    <h1>Benchmark Report</h1>
    <p><strong>Run ID:</strong> 20251204_231833</p>
    <p><strong>Date:</strong> Thu Dec  4 18:19:01 EST 2025</p>
    <h2>Results</h2>
    <table>
        <tr>
            <th>Benchmark</th>
            <th>Time (ns/op)</th>
            <th>Memory (B/op)</th>
            <th>Allocs (allocs/op)</th>
        </tr>
        <tr>
            <td class="metric">BenchmarkAPIServerCreateJobSimple-24</td>
            <td>44615</td>
            <td>ns/op</td>
            <td>13510</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMetricsCollection-24</td>
            <td>19.81</td>
            <td>ns/op</td>
            <td>0</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-1-24</td>
            <td>104975</td>
            <td>ns/op</td>
            <td>26775</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-5-24</td>
            <td>616978</td>
            <td>ns/op</td>
            <td>16959</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-10-24</td>
            <td>1342897</td>
            <td>ns/op</td>
            <td>6123</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-25-24</td>
            <td>1355236</td>
            <td>ns/op</td>
            <td>6286</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-50-24</td>
            <td>1326230</td>
            <td>ns/op</td>
            <td>6997</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/SmallExperiment-24</td>
            <td>26734717</td>
            <td>ns/op</td>
            <td>657047</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/MediumExperiment-24</td>
            <td>12165317</td>
            <td>ns/op</td>
            <td>794462</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/LargeExperiment-24</td>
            <td>3637957</td>
            <td>ns/op</td>
            <td>1128897</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/ConcurrentExperiments-24</td>
            <td>31061085</td>
            <td>ns/op</td>
            <td>1114816</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/ExperimentMetrics-24</td>
            <td>26862161</td>
            <td>ns/op</td>
            <td>615718</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetCreation-24</td>
            <td>102081</td>
            <td>ns/op</td>
            <td>2933</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetRetrieval-24</td>
            <td>52.64</td>
            <td>ns/op</td>
            <td>16</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetUpdate-24</td>
            <td>65401</td>
            <td>ns/op</td>
            <td>1285</td>
        </tr>
    </table>
    <h2>Raw Output</h2>
    <pre>goos: darwin
 goarch: arm64
 pkg: github.com/jfraeys/fetch_ml/tests/benchmarks
 cpu: Apple M2 Ultra
 BenchmarkAPIServerCreateJobSimple-24    	   27950	     44615 ns/op	   13510 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	61569640	        19.81 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    104975 ns/op	   26775 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	    9744	    616978 ns/op	   16959 B/op	     125 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     921	   1342897 ns/op	    6123 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     916	   1355236 ns/op	    6286 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     930	   1326230 ns/op	    6997 B/op	      76 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      44	  26734717 ns/op	  657047 B/op	   12351 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      93	  12165317 ns/op	  794462 B/op	    6254 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     364	   3637957 ns/op	 1128897 B/op	    1374 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      33	  31061085 ns/op	 1114816 B/op	   12631 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      54	  26862161 ns/op	  615718 B/op	   17884 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12193	    102081 ns/op	    2933 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	19180039	        52.64 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18472	     65401 ns/op	    1285 B/op	      36 allocs/op
 PASS
 ok  	github.com/jfraeys/fetch_ml/tests/benchmarks	27.261s</pre>
 </body>
 </html>
--- a/.local-artifacts/run_20251204_232656/benchmark_results.txt
+++ b/.local-artifacts/run_20251204_232656/benchmark_results.txt
@ -0,0 +1,21 @@
 goos: darwin
 goarch: arm64
 pkg: github.com/jfraeys/fetch_ml/tests/benchmarks
 cpu: Apple M2 Ultra
 BenchmarkAPIServerCreateJobSimple-24    	   27296	     43913 ns/op	   13526 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	61271120	        19.34 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    105096 ns/op	   26660 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	   10000	    646391 ns/op	   16738 B/op	     124 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     872	   1369525 ns/op	    6036 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     904	   1394439 ns/op	    6546 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     889	   1373567 ns/op	    6347 B/op	      74 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      40	  26726470 ns/op	  657367 B/op	   12352 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      97	  12430890 ns/op	  794823 B/op	    6254 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     346	   3863256 ns/op	 1129599 B/op	    1375 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      36	  32534372 ns/op	 1115220 B/op	   12637 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      52	  26962389 ns/op	  615818 B/op	   17884 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12075	    100813 ns/op	    2933 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	23898721	        50.28 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18692	     63463 ns/op	    1285 B/op	      36 allocs/op
 PASS
 ok  	github.com/jfraeys/fetch_ml/tests/benchmarks	28.692s
--- a/.local-artifacts/run_20251204_232656/clean_benchmarks.txt
+++ b/.local-artifacts/run_20251204_232656/clean_benchmarks.txt
@ -0,0 +1,15 @@
 BenchmarkAPIServerCreateJobSimple-24    	   27296	     43913 ns/op	   13526 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	61271120	        19.34 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    105096 ns/op	   26660 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	   10000	    646391 ns/op	   16738 B/op	     124 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     872	   1369525 ns/op	    6036 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     904	   1394439 ns/op	    6546 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     889	   1373567 ns/op	    6347 B/op	      74 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      40	  26726470 ns/op	  657367 B/op	   12352 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      97	  12430890 ns/op	  794823 B/op	    6254 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     346	   3863256 ns/op	 1129599 B/op	    1375 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      36	  32534372 ns/op	 1115220 B/op	   12637 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      52	  26962389 ns/op	  615818 B/op	   17884 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12075	    100813 ns/op	    2933 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	23898721	        50.28 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18692	     63463 ns/op	    1285 B/op	      36 allocs/op
--- a/.local-artifacts/run_20251204_232656/prometheus_metrics.txt
+++ b/.local-artifacts/run_20251204_232656/prometheus_metrics.txt
@ -0,0 +1,36 @@
 # HELP benchmark_time_per_op Time per operation in nanoseconds
 # TYPE benchmark_time_per_op gauge
 # HELP benchmark_memory_per_op Memory per operation in bytes
 # TYPE benchmark_memory_per_op gauge
 # HELP benchmark_allocs_per_op Allocations per operation
 # TYPE benchmark_allocs_per_op gauge
 benchmark_time_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} 43913
 benchmark_allocs_per_op{benchmark="BenchmarkAPIServerCreateJobSimple"} 13526
 benchmark_time_per_op{benchmark="BenchmarkMetricsCollection"} 19.34
 benchmark_allocs_per_op{benchmark="BenchmarkMetricsCollection"} 0
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} 105096
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_1"} 26660
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} 646391
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_5"} 16738
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} 1369525
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_10"} 6036
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} 1394439
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_25"} 6546
 benchmark_time_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} 1373567
 benchmark_allocs_per_op{benchmark="BenchmarkConcurrentRequests_Concurrency_50"} 6347
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} 26726470
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_SmallExperiment"} 657367
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} 12430890
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_MediumExperiment"} 794823
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} 3863256
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_LargeExperiment"} 1129599
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} 32534372
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_ConcurrentExperiments"} 1115220
 benchmark_time_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} 26962389
 benchmark_allocs_per_op{benchmark="BenchmarkMLExperimentExecution_ExperimentMetrics"} 615818
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} 100813
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetCreation"} 2933
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} 50.28
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetRetrieval"} 16
 benchmark_time_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} 63463
 benchmark_allocs_per_op{benchmark="BenchmarkDatasetOperations_DatasetUpdate"} 1285
--- a/.local-artifacts/run_20251204_232656/report.html
+++ b/.local-artifacts/run_20251204_232656/report.html
@ -0,0 +1,141 @@
 <!DOCTYPE html>
 <html>
 <head>
    <title>Benchmark Report - 20251204_232656</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 20px; }
        table { border-collapse: collapse; width: 100%; }
        th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
        th { background-color: #f2f2f2; }
        .metric { font-family: monospace; }
    </style>
 </head>
 <body>
    <h1>Benchmark Report</h1>
    <p><strong>Run ID:</strong> 20251204_232656</p>
    <p><strong>Date:</strong> Thu Dec  4 18:27:25 EST 2025</p>
    <h2>Results</h2>
    <table>
        <tr>
            <th>Benchmark</th>
            <th>Time (ns/op)</th>
            <th>Memory (B/op)</th>
            <th>Allocs (allocs/op)</th>
        </tr>
        <tr>
            <td class="metric">BenchmarkAPIServerCreateJobSimple-24</td>
            <td>43913</td>
            <td>ns/op</td>
            <td>13526</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMetricsCollection-24</td>
            <td>19.34</td>
            <td>ns/op</td>
            <td>0</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-1-24</td>
            <td>105096</td>
            <td>ns/op</td>
            <td>26660</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-5-24</td>
            <td>646391</td>
            <td>ns/op</td>
            <td>16738</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-10-24</td>
            <td>1369525</td>
            <td>ns/op</td>
            <td>6036</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-25-24</td>
            <td>1394439</td>
            <td>ns/op</td>
            <td>6546</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkConcurrentRequests/Concurrency-50-24</td>
            <td>1373567</td>
            <td>ns/op</td>
            <td>6347</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/SmallExperiment-24</td>
            <td>26726470</td>
            <td>ns/op</td>
            <td>657367</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/MediumExperiment-24</td>
            <td>12430890</td>
            <td>ns/op</td>
            <td>794823</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/LargeExperiment-24</td>
            <td>3863256</td>
            <td>ns/op</td>
            <td>1129599</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/ConcurrentExperiments-24</td>
            <td>32534372</td>
            <td>ns/op</td>
            <td>1115220</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkMLExperimentExecution/ExperimentMetrics-24</td>
            <td>26962389</td>
            <td>ns/op</td>
            <td>615818</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetCreation-24</td>
            <td>100813</td>
            <td>ns/op</td>
            <td>2933</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetRetrieval-24</td>
            <td>50.28</td>
            <td>ns/op</td>
            <td>16</td>
        </tr>
        <tr>
            <td class="metric">BenchmarkDatasetOperations/DatasetUpdate-24</td>
            <td>63463</td>
            <td>ns/op</td>
            <td>1285</td>
        </tr>
    </table>
    <h2>Raw Output</h2>
    <pre>goos: darwin
 goarch: arm64
 pkg: github.com/jfraeys/fetch_ml/tests/benchmarks
 cpu: Apple M2 Ultra
 BenchmarkAPIServerCreateJobSimple-24    	   27296	     43913 ns/op	   13526 B/op	      98 allocs/op
 BenchmarkMetricsCollection-24           	61271120	        19.34 ns/op	       0 B/op	       0 allocs/op
 BenchmarkConcurrentRequests/Concurrency-1-24         	   10000	    105096 ns/op	   26660 B/op	     162 allocs/op
 BenchmarkConcurrentRequests/Concurrency-5-24         	   10000	    646391 ns/op	   16738 B/op	     124 allocs/op
 BenchmarkConcurrentRequests/Concurrency-10-24        	     872	   1369525 ns/op	    6036 B/op	      71 allocs/op
 BenchmarkConcurrentRequests/Concurrency-25-24        	     904	   1394439 ns/op	    6546 B/op	      73 allocs/op
 BenchmarkConcurrentRequests/Concurrency-50-24        	     889	   1373567 ns/op	    6347 B/op	      74 allocs/op
 BenchmarkMLExperimentExecution/SmallExperiment-24    	      40	  26726470 ns/op	  657367 B/op	   12352 allocs/op
 BenchmarkMLExperimentExecution/MediumExperiment-24   	      97	  12430890 ns/op	  794823 B/op	    6254 allocs/op
 BenchmarkMLExperimentExecution/LargeExperiment-24    	     346	   3863256 ns/op	 1129599 B/op	    1375 allocs/op
 BenchmarkMLExperimentExecution/ConcurrentExperiments-24         	      36	  32534372 ns/op	 1115220 B/op	   12637 allocs/op
 BenchmarkMLExperimentExecution/ExperimentMetrics-24             	      52	  26962389 ns/op	  615818 B/op	   17884 allocs/op
 BenchmarkDatasetOperations/DatasetCreation-24                   	   12075	    100813 ns/op	    2933 B/op	      75 allocs/op
 BenchmarkDatasetOperations/DatasetRetrieval-24                  	23898721	        50.28 ns/op	      16 B/op	       1 allocs/op
 BenchmarkDatasetOperations/DatasetUpdate-24                     	   18692	     63463 ns/op	    1285 B/op	      36 allocs/op
 PASS
 ok  	github.com/jfraeys/fetch_ml/tests/benchmarks	28.692s</pre>
 </body>
 </html>
--- a/.windsurf/rules/test-new-features.md
+++ b/.windsurf/rules/test-new-features.md
@ -3,4 +3,88 @@ trigger: model_decision
 description: When a new feature is added, this prompt needs to be run
 ---
-When a significant feature is added make sure that the tests are added as well, change the docs to add details and make sure that the scripts, if needed, are changed. Don't forget to cleanup, you tend to leave a lot of unncessary files and code arround. Do not write loose .md to track task and todo, either add to the code or tell me.
+# Development Guidelines
 ## Code Quality Standards
 ### Testing Requirements
 - MANDATORY: Every new feature MUST include corresponding tests
 - Write tests BEFORE implementing complex features (TDD approach)
 - Test coverage for new code should be >80%
 - Include both unit tests and integration tests where applicable
 - Test edge cases, error paths, and boundary conditions
 ### Documentation Standards
 - Update relevant documentation IN THE SAME COMMIT as code changes
 - Documentation locations:
  - README.md: User-facing features, installation, quick start
  - CHANGELOG.md: All changes, following Keep a Changelog format
  - Code comments: Complex logic, non-obvious decisions, API contracts
  - Function/struct docs: Public APIs must have doc comments
 - Use concrete examples in documentation
 - Keep docs concise but complete
 ### Code Organization
 - CRITICAL: Clean up as you go - no orphaned files or dead code
 - Remove commented-out code blocks (use git history instead)
 - Delete unused imports, functions, and variables immediately
 - Consolidate duplicate code into reusable functions
 - Move TODO items from loose files into:
  - Code comments with `// TODO(context):` for implementation tasks
  - GitHub Issues for larger features
  - NEVER create standalone .md files for tracking
 ### When Making Changes
 For EVERY significant change, complete ALL of these:
 1. Write/update tests
 2. Update documentation (README, CHANGELOG, code comments)
 3. Update build scripts if dependencies/build process changed
 4. Remove any temporary/debug code added during development
 5. Delete unused files created during exploration
 6. Verify no dead code remains (unused functions, imports, variables)
 ### Cleanup Checklist (Run BEFORE committing)
 - [ ] Removed all debug print statements
 - [ ] Deleted temporary test files
 - [ ] Removed commented-out code
 - [ ] Cleaned up unused imports
 - [ ] Deleted exploratory/spike code
 - [ ] Consolidated duplicate logic
 - [ ] Removed obsolete scripts/configs
 ### Communication Style
 - Report what you've done: "Added feature X with tests in test/x_test.go"
 - Highlight what needs attention: "WARNING: Manual testing needed for edge case Y"
 - Ask questions directly: "Should we support Z? Trade-offs are..."
 - NEVER say "I'll track this in a markdown file" - use code comments or tell me directly
 ### Script/Build System Updates
 - Update Makefile/build.zig when adding new targets or commands
 - Modify CI/CD configs (.github/workflows) if build/test process changes
 - Update package.json/Cargo.toml/go.mod when dependencies change
 - Document new scripts in README under "Development" section
 ## Anti-Patterns to AVOID
 - Creating notes.md, todo.md, tasks.md, ideas.md files
 - Leaving commented-out code "for reference"
 - Keeping old implementation files with .old or .backup suffixes
 - Adding features without tests
 - Updating code without updating docs
 - Leaving TODO comments without context or assignee
 ## Preferred Patterns
 - Inline TODO comments: `// TODO(user): Add caching layer for better performance`
 - Self-documenting code with clear names
 - Tests that serve as usage examples
 - Incremental, complete commits (code + tests + docs)
 - Direct communication about tasks and priorities
 ## Definition of Done
 A task is complete ONLY when:
 1. Code is written and working
 2. Tests are written and passing
 3. Documentation is updated
 4. All temporary/dead code is removed
 5. Build scripts are updated if needed
 6. Changes are committed with clear message
--- a/136
+++ b/136
@ -1,4 +1,4 @@
-.PHONY: all build clean clean-docs test test-unit test-integration test-e2e test-coverage lint install dev prod setup validate configlint ci-local docs
+.PHONY: all build prod dev clean clean-docs test test-unit test-integration test-e2e test-coverage lint install setup validate configlint ci-local docs benchmark benchmark-local artifacts clean-benchmarks clean-all clean-aggressive status load-test chaos-test profile-tools detect-regressions tech-excellence docker-build docker-run docker-stop docker-logs monitoring-performance monitoring-performance-stop dashboard-performance
 # Default target
 all: build
@ -14,7 +14,7 @@ build:
 # Build production-optimized binaries
 prod:
 	go build -ldflags="-s -w" -o bin/api-server cmd/api-server/main.go
-	go build -ldflags="-s -w" -o bin/worker cmd/worker/worker_server.go
+	go build -ldflags="-s -w" -o bin/worker cmd/worker/worker_server.go cmd/worker/worker_config.go
 	go build -ldflags="-s -w" -o bin/tui ./cmd/tui
 	cd cli && zig build prod && strip zig-out/prod/ml
 	@echo "✓ Production binaries built"
@ -41,7 +41,7 @@ clean-docs:
 # Run tests
 test:
-	go test ./...
+	go test ./tests/...
 	cd cli && zig build test
 	@echo "✓ All tests passed"
@ -80,11 +80,16 @@ configlint:
 		configs/config-no-tls.yaml \
 		configs/config-dev.yaml
 worker-configlint:
 	go run ./cmd/configlint --schema configs/schema/worker_config_schema.yaml \
 		configs/worker-prod.toml
 # Run a local approximation of the CI pipeline
 ci-local:
 	make test
 	make lint
 	make configlint
 	make worker-configlint
 	@echo "Running queue package tests with race detector..."
 	go test -v -race -coverprofile=coverage/queue-coverage.out ./internal/queue/...
 	@echo "Running coverage..."
@ -157,6 +162,115 @@ docs-build:
 	@echo "Building static documentation..."
 	cd docs && mkdocs build
 # Performance benchmarking tools
 benchmark:
 	@echo "Running performance benchmarks..."
 	go test -bench=. -benchmem ./tests/benchmarks/...
 # Run benchmarks locally with artifact management
 benchmark-local:
 	@echo "Running benchmarks locally with full workflow..."
 	./scripts/run-benchmarks-local.sh
 # Manage benchmark artifacts
 artifacts:
 	@echo "Managing benchmark artifacts..."
 	./scripts/manage-artifacts.sh help
 # Clean benchmark artifacts (keep last 10)
 clean-benchmarks:
 	@echo "Cleaning benchmark artifacts..."
 	./scripts/cleanup-benchmarks.sh benchmarks
 # Comprehensive cleanup (keep last 5 runs)
 clean-all:
 	@echo "Running comprehensive cleanup..."
 	./scripts/cleanup-benchmarks.sh all
 # Aggressive cleanup (removes more data)
 clean-aggressive:
 	@echo "Running aggressive cleanup..."
 	./scripts/cleanup-benchmarks.sh aggressive
 # Show disk usage status
 status:
 	@echo "Checking disk usage..."
 	./scripts/cleanup-benchmarks.sh status
 # Start performance monitoring stack
 monitoring-performance:
 	@echo "Starting performance monitoring stack..."
 	cd monitoring && docker-compose -f docker-compose.performance.yml up -d
 	@echo "Grafana available at: http://localhost:3001 (admin/admin)"
 	@echo "Loki available at: http://localhost:3100"
 	@echo "Pushgateway available at: http://localhost:9091"
 	@echo "Quick start guide: docs/src/performance-quick-start.md"
 # Stop performance monitoring stack
 monitoring-performance-stop:
 	@echo "Stopping performance monitoring stack..."
 	cd monitoring && docker-compose -f docker-compose.performance.yml down
 # View performance dashboard
 dashboard-performance:
 	@echo "Opening performance dashboard..."
 	@echo "URL: http://localhost:3001/d/fetchml-performance/fetch-ml-performance-dashboard"
 # Load testing
 load-test:
 	@echo "Running load tests..."
 	go test -v ./tests/load/...
 # CPU profiling for HTTP LoadTestSuite (MediumLoad only for speed)
 profile-load:
 	@echo "CPU profiling MediumLoad HTTP load test..."
 	go test ./tests/load -run TestLoadProfile_Medium -count=1 -cpuprofile cpu_load.out
 	@echo "✓ CPU profile written to cpu_load.out (inspect with: go tool pprof cpu_load.out)"
 profile-load-norate:
 	@echo "CPU profiling MediumLoad HTTP load test (no rate limiting)..."
 	go test ./tests/load -run TestLoadProfile_Medium -count=1 -cpuprofile cpu_load.out -v -args -profile-norate
 	@echo "✓ CPU profile written to cpu_load.out (inspect with: go tool pprof cpu_load.out)"
 # CPU profiling for WebSocket → Redis queue → worker path
 profile-ws-queue:
 	@echo "CPU profiling WebSocket queue integration test..."
 	go test ./tests/integration -run WebSocketQueue -count=5 -cpuprofile cpu_ws.out
 	@echo "✓ CPU profile written to cpu_ws.out (inspect with: go tool pprof cpu_ws.out)"
 # Chaos engineering tests
 chaos-test:
 	@echo "Running chaos engineering tests..."
 	go test -v ./tests/chaos/...
 # Performance profiling tools
 profile-tools:
 	@echo "Building profiling tools..."
 	go build -o bin/performance-regression-detector ./tools/performance_regression_detector.go
 	go build -o bin/profiler ./tools/profiler.go
 # Performance regression detection
 detect-regressions:
 	@echo "Detecting performance regressions..."
 	@if [ ! -f "baseline.json" ]; then \
 		echo "Creating baseline performance metrics..."; \
 		go test -bench=. -benchmem ./tests/benchmarks/... | tee baseline.json; \
 	else \
 		echo "Analyzing current performance against baseline..."; \
 		go test -bench=. -benchmem ./tests/benchmarks/... | tee current.json; \
 		echo "Use tools/performance_regression_detector to analyze results"; \
 	fi
 # Technical excellence suite (runs all performance tests)
 tech-excellence: benchmark load-test chaos-test profile-tools
 	@echo "Technical excellence test suite completed"
 	@echo "Results summary:"
 	@echo "  - Benchmarks: See test output above"
 	@echo "  - Load tests: See test output above"
 	@echo "  - Chaos tests: See test output above"
 	@echo "  - Profiling tools: Built in bin/"
 	@echo "  - Regression detection: Run 'make detect-regressions'"
 # Help
 help:
 	@echo "FetchML Build System"
@ -188,6 +302,22 @@ help:
 	@echo "  make setup-monitoring - Setup monitoring stack (Linux only)"
 	@echo "  make validate         - Validate production configuration"
 	@echo ""
 	@echo "Performance Testing:"
 	@echo "  make benchmark         - Run performance benchmarks"
 	@echo "  make benchmark-local   - Run benchmarks locally with artifact management"
 	@echo "  make artifacts         - Manage benchmark artifacts (list, clean, compare, export)"
 	@echo "  make clean-benchmarks  - Clean benchmark artifacts (keep last 10)"
 	@echo "  make clean-all         - Comprehensive cleanup (keep last 5 runs)"
 	@echo "  make clean-aggressive  - Aggressive cleanup (removes more data)"
 	@echo "  make status            - Show disk usage status"
 	@echo "  make load-test         - Run load testing suite"
 	@echo "  make profile-load      - CPU profile MediumLoad HTTP test suite"
 	@echo "  make profile-ws-queue  - CPU profile WebSocket→queue→worker path"
 	@echo "  make chaos-test        - Run chaos engineering tests"
 	@echo "  make profile-tools     - Build performance profiling tools"
 	@echo "  make detect-regressions - Detect performance regressions"
 	@echo "  make tech-excellence   - Run complete technical excellence suite"
 	@echo ""
 	@echo "Documentation:"
 	@echo "  make docs-setup       - Install MkDocs and dependencies"
 	@echo "  make docs             - Start MkDocs development server with live reload"
--- a/build/docker/full-prod.Dockerfile
+++ b/build/docker/full-prod.Dockerfile
@ -0,0 +1,73 @@
 # Full Production Dockerfile with Podman and SSH
 FROM golang:1.25-alpine AS builder
 # Install dependencies
 RUN apk add --no-cache git make
 # Set working directory
 WORKDIR /app
 # Copy go mod files
 COPY go.mod go.sum ./
 # Download dependencies
 RUN go mod download
 # Copy source code
 COPY . .
 # Build Go binaries
 RUN go build -o bin/api-server cmd/api-server/main.go && \
    go build -o bin/worker cmd/worker/worker_server.go cmd/worker/worker_config.go
 # Final stage with Podman
 FROM alpine:3.19
 # Install runtime dependencies including Podman and SSH
 RUN apk add --no-cache ca-certificates redis openssl curl podman openssh
 # Create app user
 RUN addgroup -g 1001 -S appgroup && \
    adduser -u 1001 -S appuser -G appgroup
 # Set working directory
 WORKDIR /app
 # Copy binaries from builder
 COPY --from=builder /app/bin/ /usr/local/bin/
 # Copy configs
 COPY --from=builder /app/configs/ /app/configs/
 # Create necessary directories
 RUN mkdir -p /app/data/experiments /app/logs /app/ssl /app/ssh /tmp/fetchml-jobs
 # Generate SSL certificates
 RUN openssl req -x509 -newkey rsa:2048 -keyout /app/ssl/key.pem -out /app/ssl/cert.pem -days 365 -nodes \
    -subj "/C=US/ST=Homelab/L=Local/O=ML/OU=Experiments/CN=localhost" && \
    chmod 644 /app/ssl/cert.pem /app/ssl/key.pem
 # Generate SSH keys for container communication
 RUN ssh-keygen -t rsa -b 2048 -f /app/ssh/id_rsa -N "" && \
    cp /app/ssh/id_rsa.pub /app/ssh/authorized_keys && \
    chmod 600 /app/ssh/id_rsa && \
    chmod 644 /app/ssh/id_rsa.pub /app/ssh/authorized_keys
 # Configure SSH daemon
 RUN echo "PermitRootLogin yes" >> /etc/ssh/sshd_config && \
    echo "PasswordAuthentication no" >> /etc/ssh/sshd_config && \
    echo "PubkeyAuthentication yes" >> /etc/ssh/sshd_config && \
    echo "AuthorizedKeysFile /app/ssh/authorized_keys" >> /etc/ssh/sshd_config
 # Switch to app user
 USER appuser
 # Expose ports
 EXPOSE 9101 22
 # Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
  CMD curl -k -f https://localhost:9101/health || exit 1
 # Default command for API server
 CMD ["/usr/local/bin/api-server", "-config", "/app/configs/config.yaml"]
--- a/build/docker/homelab-secure.Dockerfile
+++ b/build/docker/homelab-secure.Dockerfile
@ -0,0 +1,147 @@
 # Homelab Secure Production Dockerfile
 FROM golang:1.25-alpine AS builder
 # Install dependencies
 RUN apk add --no-cache git make
 # Set working directory
 WORKDIR /app
 # Copy go mod files
 COPY go.mod go.sum ./
 # Download dependencies
 RUN go mod download
 # Copy source code
 COPY . .
 # Build Go binaries
 RUN go build -o bin/api-server cmd/api-server/main.go && \
    go build -o bin/worker cmd/worker/worker_server.go cmd/worker/worker_config.go
 # Final stage with security hardening
 FROM alpine:3.19
 # Install security packages and runtime dependencies
 RUN apk add --no-cache \
    ca-certificates \
    redis \
    openssl \
    curl \
    podman \
    openssh \
    sudo \
    fail2ban \
    logrotate \
    && rm -rf /var/cache/apk/*
 # Create app user and worker user with no shell by default
 RUN addgroup -g 1001 -S appgroup && \
    adduser -u 1001 -S appuser -G appgroup -s /sbin/nologin && \
    addgroup -g 1002 -S workergroup && \
    adduser -u 1002 -S worker -G workergroup -s /bin/sh && \
    echo "worker:HomelabWorker2024!" | chpasswd && \
    mkdir -p /home/worker/.ssh && \
    chown -R worker:workergroup /home/worker
 # Set working directory
 WORKDIR /app
 # Copy binaries from builder
 COPY --from=builder /app/bin/ /usr/local/bin/
 # Copy configs
 COPY --from=builder /app/configs/ /app/configs/
 # Create necessary directories with proper permissions
 RUN mkdir -p /app/data/experiments /app/logs /app/ssl /tmp/fetchml-jobs && \
    chown -R appuser:appgroup /app && \
    chmod 750 /app/data/experiments /app/logs
 # Generate SSL certificates with stronger crypto
 RUN openssl req -x509 -newkey rsa:4096 -keyout /app/ssl/key.pem -out /app/ssl/cert.pem -days 365 -nodes \
    -subj "/C=US/ST=Homelab/L=Local/O=ML/OU=Experiments/CN=localhost" && \
    chmod 600 /app/ssl/key.pem && \
    chmod 644 /app/ssl/cert.pem
 # Generate SSH keys with stronger crypto
 RUN ssh-keygen -t rsa -b 4096 -f /home/worker/.ssh/id_rsa -N "" && \
    cp /home/worker/.ssh/id_rsa.pub /home/worker/.ssh/authorized_keys && \
    chmod 700 /home/worker/.ssh && \
    chmod 600 /home/worker/.ssh/id_rsa && \
    chmod 644 /home/worker/.ssh/id_rsa.pub /home/worker/.ssh/authorized_keys && \
    chown -R worker:workergroup /home/worker/.ssh
 # Configure SSH with security hardening
 RUN echo "Port 2222" >> /etc/ssh/sshd_config && \
    echo "PermitRootLogin no" >> /etc/ssh/sshd_config && \
    echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config && \
    echo "PubkeyAuthentication yes" >> /etc/ssh/sshd_config && \
    echo "AuthorizedKeysFile %h/.ssh/authorized_keys" >> /etc/ssh/sshd_config && \
    echo "AllowUsers worker" >> /etc/ssh/sshd_config && \
    echo "MaxAuthTries 3" >> /etc/ssh/sshd_config && \
    echo "ClientAliveInterval 300" >> /etc/ssh/sshd_config && \
    echo "ClientAliveCountMax 2" >> /etc/ssh/sshd_config && \
    echo "X11Forwarding no" >> /etc/ssh/sshd_config && \
    echo "AllowTcpForwarding no" >> /etc/ssh/sshd_config && \
    echo "Banner /etc/ssh/banner" >> /etc/ssh/sshd_config && \
    echo "Protocol 2" >> /etc/ssh/sshd_config && \
    echo "Ciphers chacha20-poly1305@openssh.com,aes256-gcm@openssh.com,aes128-gcm@openssh.com" >> /etc/ssh/sshd_config && \
    echo "MACs hmac-sha2-256-etm@openssh.com,hmac-sha2-512-etm@openssh.com,hmac-sha2-256,hmac-sha2-512" >> /etc/ssh/sshd_config && \
    echo "KexAlgorithms curve25519-sha256@libssh.org,diffie-hellman-group16-sha512" >> /etc/ssh/sshd_config
 # Create SSH banner
 RUN echo "=================================================" > /etc/ssh/banner && \
    echo "  ML Experiments Homelab Server" >> /etc/ssh/banner && \
    echo "  Unauthorized access is prohibited" >> /etc/ssh/banner && \
    echo "  All connections are monitored and logged" >> /etc/ssh/banner && \
    echo "=================================================" >> /etc/ssh/banner
 # Generate SSH host keys
 RUN ssh-keygen -A
 # Configure fail2ban for SSH protection
 RUN echo "[DEFAULT]" > /etc/fail2ban/jail.local && \
    echo "bantime = 3600" >> /etc/fail2ban/jail.local && \
    echo "findtime = 600" >> /etc/fail2ban/jail.local && \
    echo "maxretry = 3" >> /etc/fail2ban/jail.local && \
    echo "" >> /etc/fail2ban/jail.local && \
    echo "[sshd]" >> /etc/fail2ban/jail.local && \
    echo "enabled = true" >> /etc/fail2ban/jail.local && \
    echo "port = 2222" >> /etc/fail2ban/jail.local && \
    echo "filter = sshd" >> /etc/fail2ban/jail.local && \
    echo "logpath = /var/log/messages" >> /etc/fail2ban/jail.local
 # Configure sudo with restricted access
 RUN echo "appuser ALL=(ALL) NOPASSWD: /app/start-security.sh" >> /etc/sudoers && \
    echo "appuser ALL=(ALL) NOPASSWD: /usr/sbin/sshd" >> /etc/sudoers && \
    echo "appuser ALL=(ALL) NOPASSWD: /usr/bin/ssh-keygen" >> /etc/sudoers && \
    echo "worker ALL=(ALL) NOPASSWD: /usr/bin/podman" >> /etc/sudoers && \
    echo "Defaults:appuser !requiretty" >> /etc/sudoers && \
    echo "Defaults:worker !requiretty" >> /etc/sudoers && \
    echo "Defaults:appuser !lecture" >> /etc/sudoers && \
    echo "Defaults:worker !lecture" >> /etc/sudoers
 # Security hardening - remove setuid binaries except sudo
 RUN find / -perm /4000 -type f -not -path "/usr/bin/sudo" -exec chmod 755 {} \; 2>/dev/null || true
 # Create startup script for security services
 RUN echo "#!/bin/sh" > /app/start-security.sh && \
    echo "ssh-keygen -A" >> /app/start-security.sh && \
    echo "/usr/sbin/sshd -D -p 2222" >> /app/start-security.sh && \
    echo "# End of security services" >> /app/start-security.sh && \
    chmod 755 /app/start-security.sh
 # Switch to app user for application
 USER appuser
 # Expose ports
 EXPOSE 9101 2222
 # Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
  CMD curl -k -f https://localhost:9101/health || exit 1
 # Default command for API server
 CMD ["/usr/local/bin/api-server", "-config", "/app/configs/config.yaml"]
--- a/build/docker/secure-prod.Dockerfile
+++ b/build/docker/secure-prod.Dockerfile
@ -0,0 +1,102 @@
 # Secure Production Dockerfile with proper SSH setup
 FROM golang:1.25-alpine AS builder
 # Install dependencies
 RUN apk add --no-cache git make gcc musl-dev
 # Set working directory
 WORKDIR /app
 # Copy go mod files
 COPY go.mod go.sum ./
 # Download dependencies
 RUN go mod download
 # Copy source code
 COPY . .
 # Build Go binaries with CGO enabled for SQLite
 RUN CGO_ENABLED=1 go build -o bin/api-server cmd/api-server/main.go && \
    CGO_ENABLED=1 go build -o bin/worker cmd/worker/worker_server.go cmd/worker/worker_config.go
 # Final stage with Podman and secure SSH
 FROM alpine:3.19
 # Install runtime dependencies including Podman and SSH
 RUN apk add --no-cache ca-certificates redis openssl curl podman openssh sudo gcc musl-dev
 # Create app user and worker user
 RUN addgroup -g 1001 -S appgroup && \
    adduser -u 1001 -S appuser -G appgroup && \
    addgroup -g 1002 -S workergroup && \
    adduser -u 1002 -S worker -G workergroup -s /bin/sh && \
    echo "worker:SecureWorkerPass2024!" | chpasswd && \
    mkdir -p /home/worker/.ssh && \
    chown -R worker:workergroup /home/worker
 # Set working directory
 WORKDIR /app
 # Copy binaries from builder
 COPY --from=builder /app/bin/ /usr/local/bin/
 # Copy configs
 COPY --from=builder /app/configs/ /app/configs/
 # Create necessary directories
 RUN mkdir -p /app/data/experiments /app/logs /app/ssl /tmp/fetchml-jobs && \
    chown -R appuser:appgroup /app
 # Generate SSL certificates
 RUN openssl req -x509 -newkey rsa:2048 -keyout /app/ssl/key.pem -out /app/ssl/cert.pem -days 365 -nodes \
    -subj "/C=US/ST=Homelab/L=Local/O=ML/OU=Experiments/CN=localhost" && \
    chmod 644 /app/ssl/cert.pem /app/ssl/key.pem
 # Generate SSH keys for worker user
 RUN ssh-keygen -t rsa -b 4096 -f /home/worker/.ssh/id_rsa -N "" && \
    cp /home/worker/.ssh/id_rsa.pub /home/worker/.ssh/authorized_keys && \
    chmod 700 /home/worker/.ssh && \
    chmod 600 /home/worker/.ssh/id_rsa && \
    chmod 644 /home/worker/.ssh/id_rsa.pub /home/worker/.ssh/authorized_keys && \
    chown -R worker:workergroup /home/worker/.ssh
 # Configure SSH daemon securely
 RUN echo "Port 2222" >> /etc/ssh/sshd_config && \
    echo "PermitRootLogin no" >> /etc/ssh/sshd_config && \
    echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config && \
    echo "PubkeyAuthentication yes" >> /etc/ssh/sshd_config && \
    echo "AuthorizedKeysFile %h/.ssh/authorized_keys" >> /etc/ssh/sshd_config && \
    echo "AllowUsers worker" >> /etc/ssh/sshd_config && \
    echo "MaxAuthTries 3" >> /etc/ssh/sshd_config && \
    echo "ClientAliveInterval 300" >> /etc/ssh/sshd_config && \
    echo "ClientAliveCountMax 2" >> /etc/ssh/sshd_config && \
    echo "X11Forwarding no" >> /etc/ssh/sshd_config && \
    echo "AllowTcpForwarding no" >> /etc/ssh/sshd_config && \
    echo "Banner /etc/ssh/banner" >> /etc/ssh/sshd_config
 # Create SSH banner
 RUN echo "=================================================" > /etc/ssh/banner && \
    echo "  ML Experiments Production Server" >> /etc/ssh/banner && \
    echo "  Unauthorized access is prohibited" >> /etc/ssh/banner && \
    echo "=================================================" >> /etc/ssh/banner
 # Generate SSH host keys
 RUN ssh-keygen -A
 # Give appuser sudo permissions for SSH and worker user for Podman
 RUN echo "appuser ALL=(ALL) NOPASSWD: /usr/sbin/sshd" >> /etc/sudoers && \
    echo "worker ALL=(ALL) NOPASSWD: /usr/bin/podman" >> /etc/sudoers
 # Switch to app user for application
 USER appuser
 # Expose ports
 EXPOSE 9101 2222
 # Health check
 HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
  CMD curl -k -f https://localhost:9101/health || exit 1
 # Default command for API server
 CMD ["/usr/local/bin/api-server", "-config", "/app/configs/config.yaml"]
--- a/cli/README.md
+++ b/cli/README.md
@ -19,12 +19,28 @@ zig build
 - `ml init` - Setup configuration
 - `ml sync <path>` - Sync project to server  
- `ml queue <job>` - Queue job for execution
+- `ml queue <job1> [job2 ...] [--commit <id>] [--priority N]` - Queue one or more jobs
- `ml status` - Check system status
+- `ml status` - Check system/queue status for your API key
- `ml monitor` - Launch monitoring interface
+- `ml monitor` - Launch monitoring interface (TUI)
- `ml cancel <job>` - Cancel running job
+- `ml cancel <job>` - Cancel a running/queued job you own
 - `ml prune --keep N` - Keep N recent experiments
 - `ml watch <path>` - Auto-sync directory
 - `ml experiment log|show|list|delete` - Manage experiments and metrics
 ### Experiment workflow (minimal)
 - `ml sync ./my-experiment --queue`  
  Syncs files, computes a unique commit ID for the directory, and queues a job.
 - `ml queue my-job`  
  Queues a job named `my-job`. If `--commit` is omitted, the CLI generates a random commit ID
  and records `(job_name, commit_id)` in `~/.ml/history.log` so you don't have to remember hashes.
 - `ml experiment list`  
  Shows recent experiments from history with alias (job name) and commit ID.
 - `ml experiment delete <alias|commit>`  
  Cancels a running/queued experiment by job name, full commit ID, or short commit prefix.
 ## Configuration
--- a/cli/src/commands/experiment.zig
+++ b/cli/src/commands/experiment.zig
@ -2,13 +2,18 @@ const std = @import("std");
 const config = @import("../config.zig");
 const ws = @import("../net/ws.zig");
 const protocol = @import("../net/protocol.zig");
 const history = @import("../utils/history.zig");
 const colors = @import("../utils/colors.zig");
 const cancel_cmd = @import("cancel.zig");
 pub fn execute(allocator: std.mem.Allocator, args: []const []const u8) !void {
    if (args.len < 1) {
        std.debug.print("Usage: ml experiment <command> [args]\n", .{});
        std.debug.print("Commands:\n", .{});
-        std.debug.print("  log   Log a metric\n", .{});
+        std.debug.print("  log      Log a metric\n", .{});
-        std.debug.print("  show  Show experiment details\n", .{});
+        std.debug.print("  show     Show experiment details\n", .{});
        std.debug.print("  list     List recent experiments (alias + commit)\n", .{});
        std.debug.print("  delete   Cancel a running experiment by alias or commit\n", .{});
        return;
    }
@ -18,6 +23,14 @@ pub fn execute(allocator: std.mem.Allocator, args: []const []const u8) !void {
        try executeLog(allocator, args[1..]);
    } else if (std.mem.eql(u8, command, "show")) {
        try executeShow(allocator, args[1..]);
    } else if (std.mem.eql(u8, command, "list")) {
        try executeList(allocator);
    } else if (std.mem.eql(u8, command, "delete")) {
        if (args.len < 2) {
            std.debug.print("Usage: ml experiment delete <alias|commit>\n", .{});
            return;
        }
        try executeDelete(allocator, args[1]);
    } else {
        std.debug.print("Unknown command: {s}\n", .{command});
    }
@ -190,3 +203,62 @@ fn executeShow(allocator: std.mem.Allocator, args: []const []const u8) !void {
        },
    }
 }
 fn executeList(allocator: std.mem.Allocator) !void {
    const entries = history.loadEntries(allocator) catch |err| {
        colors.printError("Failed to read experiment history: {}\n", .{err});
        return err;
    };
    defer history.freeEntries(allocator, entries);
    if (entries.len == 0) {
        colors.printWarning("No experiments recorded yet. Use `ml sync --queue` or `ml queue` to submit one.\n", .{});
        return;
    }
    colors.printInfo("\nRecent Experiments (latest first):\n", .{});
    colors.printInfo("---------------------------------\n", .{});
    const max_display = if (entries.len > 20) 20 else entries.len;
    var idx: usize = 0;
    while (idx < max_display) : (idx += 1) {
        const entry = entries[entries.len - idx - 1];
        std.debug.print("{d:2}) Alias: {s}\n", .{ idx + 1, entry.job_name });
        std.debug.print("     Commit: {s}\n", .{entry.commit_id});
        std.debug.print("     Queued: {d}\n\n", .{entry.queued_at});
    }
    if (entries.len > max_display) {
        colors.printInfo("...and {d} more\n", .{entries.len - max_display});
    }
 }
 fn executeDelete(allocator: std.mem.Allocator, identifier: []const u8) !void {
    const resolved = try resolveJobIdentifier(allocator, identifier);
    defer allocator.free(resolved);
    const args = [_][]const u8{resolved};
    cancel_cmd.run(allocator, &args) catch |err| {
        colors.printError("Failed to cancel experiment '{s}': {}\n", .{ resolved, err });
        return err;
    };
 }
 fn resolveJobIdentifier(allocator: std.mem.Allocator, identifier: []const u8) ![]const u8 {
    const entries = history.loadEntries(allocator) catch {
        return allocator.dupe(u8, identifier);
    };
    defer history.freeEntries(allocator, entries);
    for (entries) |entry| {
        if (std.mem.eql(u8, identifier, entry.job_name) or
            std.mem.eql(u8, identifier, entry.commit_id) or
            (identifier.len <= entry.commit_id.len and
                std.mem.eql(u8, entry.commit_id[0..identifier.len], identifier)))
        {
            return allocator.dupe(u8, entry.job_name);
        }
    }
    return allocator.dupe(u8, identifier);
 }
--- a/cli/src/commands/queue.zig
+++ b/cli/src/commands/queue.zig
@ -3,6 +3,8 @@ const Config = @import("../config.zig").Config;
 const ws = @import("../net/ws.zig");
 const crypto = @import("../utils/crypto.zig");
 const colors = @import("../utils/colors.zig");
 const history = @import("../utils/history.zig");
 const stdcrypto = std.crypto;
 pub fn run(allocator: std.mem.Allocator, args: []const []const u8) !void {
    if (args.len == 0) {
@ -17,7 +19,7 @@ pub fn run(allocator: std.mem.Allocator, args: []const []const u8) !void {
    };
    defer job_names.deinit(allocator);
-    var commit_id: ?[]const u8 = null;
+    var commit_id_override: ?[]const u8 = null;
    var priority: u8 = 5;
    // Parse arguments - separate job names from flags
@ -28,7 +30,10 @@ pub fn run(allocator: std.mem.Allocator, args: []const []const u8) !void {
        if (std.mem.startsWith(u8, arg, "--")) {
            // Parse flags
            if (std.mem.eql(u8, arg, "--commit") and i + 1 < args.len) {
-                commit_id = args[i + 1];
+                if (commit_id_override != null) {
                    allocator.free(commit_id_override.?);
                }
                commit_id_override = try allocator.dupe(u8, args[i + 1]);
                i += 1;
            } else if (std.mem.eql(u8, arg, "--priority") and i + 1 < args.len) {
                priority = try std.fmt.parseInt(u8, args[i + 1], 10);
@ -58,10 +63,12 @@ pub fn run(allocator: std.mem.Allocator, args: []const []const u8) !void {
    };
    defer failed_jobs.deinit(allocator);
    defer if (commit_id_override) |cid| allocator.free(cid);
    for (job_names.items, 0..) |job_name, index| {
        colors.printProgress("Processing job {d}/{d}: {s}\n", .{ index + 1, job_names.items.len, job_name });
-        queueSingleJob(allocator, job_name, commit_id, priority) catch |err| {
+        queueSingleJob(allocator, job_name, commit_id_override, priority) catch |err| {
            colors.printError("Failed to queue job '{s}': {}\n", .{ job_name, err });
            failed_jobs.append(allocator, job_name) catch |append_err| {
                colors.printError("Failed to track failed job: {}\n", .{append_err});
@ -85,11 +92,26 @@ pub fn run(allocator: std.mem.Allocator, args: []const []const u8) !void {
    }
 }
-fn queueSingleJob(allocator: std.mem.Allocator, job_name: []const u8, commit_id: ?[]const u8, priority: u8) !void {
+fn generateCommitID(allocator: std.mem.Allocator) ![]const u8 {
-    if (commit_id == null) {
+    var bytes: [32]u8 = undefined;
-        colors.printError("Error: --commit is required\n", .{});
+    stdcrypto.random.bytes(&bytes);
-        return error.MissingCommit;
+
    var commit = try allocator.alloc(u8, 64);
    const hex = "0123456789abcdef";
    for (bytes, 0..) |b, idx| {
        commit[idx * 2] = hex[(b >> 4) & 0xF];
        commit[idx * 2 + 1] = hex[b & 0xF];
    }
    return commit;
 }
 fn queueSingleJob(allocator: std.mem.Allocator, job_name: []const u8, commit_override: ?[]const u8, priority: u8) !void {
    const commit_id = blk: {
        if (commit_override) |cid| break :blk cid;
        const generated = try generateCommitID(allocator);
        break :blk generated;
    };
    defer if (commit_override == null) allocator.free(commit_id);
    const config = try Config.load(allocator);
    defer {
@ -97,22 +119,24 @@ fn queueSingleJob(allocator: std.mem.Allocator, job_name: []const u8, commit_id:
        mut_config.deinit(allocator);
    }
-    colors.printInfo("Queueing job '{s}' with commit {s}...\n", .{ job_name, commit_id.? });
+    colors.printInfo("Queueing job '{s}' with commit {s}...\n", .{ job_name, commit_id });
-    // Use plain password for WebSocket authentication, hash for binary protocol
+    // API key is already hashed in config, use as-is
-    const api_key_plain = config.api_key; // Plain password from config
+    const api_key_hash = config.api_key;
    const api_key_hash = try crypto.hashString(allocator, api_key_plain);
    defer allocator.free(api_key_hash);
    // Connect to WebSocket and send queue message
-    const ws_url = try std.fmt.allocPrint(allocator, "ws://{s}:9101/ws", .{config.worker_host});
+    const ws_url = try std.fmt.allocPrint(allocator, "ws://{s}:9103/ws", .{config.worker_host});
    defer allocator.free(ws_url);
-    var client = try ws.Client.connect(allocator, ws_url, api_key_plain);
+    var client = try ws.Client.connect(allocator, ws_url, api_key_hash);
    defer client.close();
-    try client.sendQueueJob(job_name, commit_id.?, priority, api_key_hash);
+    try client.sendQueueJob(job_name, commit_id, priority, api_key_hash);
    // Receive structured response
    try client.receiveAndHandleResponse(allocator, "Job queue");
    history.record(allocator, job_name, commit_id) catch |err| {
        colors.printWarning("Warning: failed to record job in history ({})\n", .{err});
    };
 }
--- a/cli/src/commands/status.zig
+++ b/cli/src/commands/status.zig
@ -17,7 +17,7 @@ const UserContext = struct {
 fn authenticateUser(allocator: std.mem.Allocator, config: Config) !UserContext {
    // Validate API key by making a simple API call to the server
-    const ws_url = try std.fmt.allocPrint(allocator, "ws://{s}:9101/ws", .{config.worker_host});
+    const ws_url = try std.fmt.allocPrint(allocator, "ws://{s}:9103/ws", .{config.worker_host});
    defer allocator.free(ws_url);
    // Try to connect with the API key to validate it
@ -65,18 +65,16 @@ pub fn run(allocator: std.mem.Allocator, args: []const []const u8) !void {
    var user_context = try authenticateUser(allocator, config);
    defer user_context.deinit();
-    // Use plain password for WebSocket authentication, compute hash for binary protocol
+    // API key is already hashed in config, use as-is
-    const api_key_plain = config.api_key; // Plain password from config
+    const api_key_hash = config.api_key;
    const api_key_hash = try crypto.hashString(allocator, api_key_plain);
    defer allocator.free(api_key_hash);
    // Connect to WebSocket and request status
-    const ws_url = std.fmt.allocPrint(allocator, "ws://{s}:9101/ws", .{config.worker_host}) catch |err| {
+    const ws_url = std.fmt.allocPrint(allocator, "ws://{s}:9103/ws", .{config.worker_host}) catch |err| {
        return err;
    };
    defer allocator.free(ws_url);
-    var client = ws.Client.connect(allocator, ws_url, api_key_plain) catch |err| {
+    var client = ws.Client.connect(allocator, ws_url, api_key_hash) catch |err| {
        switch (err) {
            error.ConnectionRefused => return error.ConnectionFailed,
            error.NetworkUnreachable => return error.ServerUnreachable,
@ -86,9 +84,7 @@ pub fn run(allocator: std.mem.Allocator, args: []const []const u8) !void {
    };
    defer client.close();
-    client.sendStatusRequest(api_key_hash) catch {
+    try client.sendStatusRequest(api_key_hash);
        return error.RequestFailed;
    };
    // Receive and display user-filtered response
    try client.receiveAndHandleStatusResponse(allocator, user_context);
--- a/cli/src/net/ws.zig
+++ b/cli/src/net/ws.zig
@ -125,7 +125,7 @@ pub const Client = struct {
        const key = try generateWebSocketKey(allocator);
        defer allocator.free(key);
-        // Send handshake request with API key authentication
+        // API key is already hashed in config, send as-is
        const request = try std.fmt.allocPrint(allocator, "GET {s} HTTP/1.1\r\n" ++
            "Host: {s}\r\n" ++
            "Upgrade: websocket\r\n" ++
@ -427,15 +427,40 @@ pub const Client = struct {
    /// Receive and handle status response with user filtering
    pub fn receiveAndHandleStatusResponse(self: *Client, allocator: std.mem.Allocator, user_context: anytype) !void {
        _ = user_context; // TODO: Use for filtering
        const message = try self.receiveMessage(allocator);
        defer allocator.free(message);
-        // For now, just display a simple success message with user context
+        // Check if message is JSON or plain text
-        // TODO: Parse JSON response and display user-filtered jobs
+        if (message[0] == '{') {
-        std.debug.print("Status retrieved for user: {s}\n", .{user_context.name});
+            // Parse JSON response
            const parsed = try std.json.parseFromSlice(std.json.Value, allocator, message, .{});
            defer parsed.deinit();
            const root = parsed.value.object;
-        // Display basic status summary
+            // Display user info
-        std.debug.print("Your jobs will be displayed here\n", .{});
+            if (root.get("user")) |user_obj| {
                const user = user_obj.object;
                const name = user.get("name").?.string;
                const admin = user.get("admin").?.bool;
                std.debug.print("Status retrieved for user: {s} (admin: {})\n", .{ name, admin });
            }
            // Display task summary
            if (root.get("tasks")) |tasks_obj| {
                const tasks = tasks_obj.object;
                const total = tasks.get("total").?.integer;
                const queued = tasks.get("queued").?.integer;
                const running = tasks.get("running").?.integer;
                const failed = tasks.get("failed").?.integer;
                const completed = tasks.get("completed").?.integer;
                std.debug.print("Tasks: {d} total, {d} queued, {d} running, {d} failed, {d} completed\n", .{ total, queued, running, failed, completed });
            }
        } else {
            // Handle plain text response
            std.debug.print("Server response: {s}\n", .{message});
            return;
        }
    }
    /// Receive and handle cancel response with user permissions
--- a/cli/src/utils/history.zig
+++ b/cli/src/utils/history.zig
@ -0,0 +1,101 @@
 const std = @import("std");
 pub const Entry = struct {
    job_name: []const u8,
    commit_id: []const u8,
    queued_at: i64,
 };
 fn historyDir(allocator: std.mem.Allocator) ![]const u8 {
    const home = std.posix.getenv("HOME") orelse return error.NoHomeDir;
    return std.fmt.allocPrint(allocator, "{s}/.ml", .{home});
 }
 fn historyPath(allocator: std.mem.Allocator) ![]const u8 {
    const dir = try historyDir(allocator);
    defer allocator.free(dir);
    return std.fmt.allocPrint(allocator, "{s}/history.log", .{dir});
 }
 pub fn record(allocator: std.mem.Allocator, job_name: []const u8, commit_id: []const u8) !void {
    const dir = try historyDir(allocator);
    defer allocator.free(dir);
    std.fs.makeDirAbsolute(dir) catch |err| {
        if (err != error.PathAlreadyExists) return err;
    };
    const path = try historyPath(allocator);
    defer allocator.free(path);
    var file = std.fs.openFileAbsolute(path, .{ .mode = .read_write }) catch |err| switch (err) {
        error.FileNotFound => try std.fs.createFileAbsolute(path, .{}),
        else => return err,
    };
    defer file.close();
    // Append at end of file
    try file.seekFromEnd(0);
    const ts = std.time.timestamp();
    // Format one line into a temporary buffer
    const line = try std.fmt.allocPrint(
        allocator,
        "{d}\t{s}\t{s}\n",
        .{ ts, job_name, commit_id },
    );
    defer allocator.free(line);
    try file.writeAll(line);
 }
 pub fn loadEntries(allocator: std.mem.Allocator) ![]Entry {
    const path = historyPath(allocator) catch |err| switch (err) {
        error.NoHomeDir => return error.NoHomeDir,
        else => return err,
    };
    defer allocator.free(path);
    const file = std.fs.openFileAbsolute(path, .{}) catch |err| switch (err) {
        error.FileNotFound => return &.{},
        else => return err,
    };
    defer file.close();
    const contents = try file.readToEndAlloc(allocator, 1024 * 1024);
    defer allocator.free(contents);
    var entries = std.ArrayListUnmanaged(Entry){};
    defer entries.deinit(allocator);
    var it = std.mem.splitScalar(u8, contents, '\n');
    while (it.next()) |line_full| {
        const line = std.mem.trim(u8, line_full, " \t\r");
        if (line.len == 0) continue;
        var parts = std.mem.splitScalar(u8, line, '\t');
        const ts_str = parts.next() orelse continue;
        const job = parts.next() orelse continue;
        const commit = parts.next() orelse continue;
        const ts = std.fmt.parseInt(i64, ts_str, 10) catch continue;
        const job_dup = try allocator.dupe(u8, job);
        const commit_dup = try allocator.dupe(u8, commit);
        try entries.append(allocator, Entry{
            .job_name = job_dup,
            .commit_id = commit_dup,
            .queued_at = ts,
        });
    }
    return try entries.toOwnedSlice(allocator);
 }
 pub fn freeEntries(allocator: std.mem.Allocator, entries: []Entry) void {
    for (entries) |entry| {
        allocator.free(entry.job_name);
        allocator.free(entry.commit_id);
    }
    allocator.free(entries);
 }
--- a/cmd/api-server/main.go
+++ b/cmd/api-server/main.go
@ -1,3 +1,4 @@
 // Package main implements the fetch_ml API server
 package main
 import (
@ -17,6 +18,7 @@ import (
 	"github.com/jfraeys/fetch_ml/internal/auth"
 	"github.com/jfraeys/fetch_ml/internal/config"
 	"github.com/jfraeys/fetch_ml/internal/experiment"
 	"github.com/jfraeys/fetch_ml/internal/fileutil"
 	"github.com/jfraeys/fetch_ml/internal/logging"
 	"github.com/jfraeys/fetch_ml/internal/middleware"
 	"github.com/jfraeys/fetch_ml/internal/queue"
@ -24,17 +26,19 @@ import (
 	"gopkg.in/yaml.v3"
 )
-// Config structure matching worker config
+// Config structure matching worker config.
 type Config struct {
-	BasePath string          `yaml:"base_path"`
+	BasePath  string                `yaml:"base_path"`
-	Auth     auth.AuthConfig `yaml:"auth"`
+	Auth      auth.Config           `yaml:"auth"`
-	Server   ServerConfig    `yaml:"server"`
+	Server    ServerConfig          `yaml:"server"`
-	Security SecurityConfig  `yaml:"security"`
+	Security  SecurityConfig        `yaml:"security"`
-	Redis    RedisConfig     `yaml:"redis"`
+	Redis     RedisConfig           `yaml:"redis"`
-	Database DatabaseConfig  `yaml:"database"`
+	Database  DatabaseConfig        `yaml:"database"`
-	Logging  logging.Config  `yaml:"logging"`
+	Logging   logging.Config        `yaml:"logging"`
 	Resources config.ResourceConfig `yaml:"resources"`
 }
 // RedisConfig holds Redis connection configuration.
 type RedisConfig struct {
 	Addr     string `yaml:"addr"`
 	Password string `yaml:"password"`
@ -42,6 +46,7 @@ type RedisConfig struct {
 	URL      string `yaml:"url"`
 }
 // DatabaseConfig holds database connection configuration.
 type DatabaseConfig struct {
 	Type       string `yaml:"type"`
 	Connection string `yaml:"connection"`
@ -52,37 +57,43 @@ type DatabaseConfig struct {
 	Database   string `yaml:"database"`
 }
 // SecurityConfig holds security-related configuration.
 type SecurityConfig struct {
 	RateLimit     RateLimitConfig `yaml:"rate_limit"`
 	IPWhitelist   []string        `yaml:"ip_whitelist"`
 	FailedLockout LockoutConfig   `yaml:"failed_login_lockout"`
 }
 // RateLimitConfig holds rate limiting configuration.
 type RateLimitConfig struct {
 	Enabled           bool `yaml:"enabled"`
 	RequestsPerMinute int  `yaml:"requests_per_minute"`
 	BurstSize         int  `yaml:"burst_size"`
 }
 // LockoutConfig holds failed login lockout configuration.
 type LockoutConfig struct {
 	Enabled         bool   `yaml:"enabled"`
 	MaxAttempts     int    `yaml:"max_attempts"`
 	LockoutDuration string `yaml:"lockout_duration"`
 }
 // ServerConfig holds server configuration.
 type ServerConfig struct {
 	Address string    `yaml:"address"`
 	TLS     TLSConfig `yaml:"tls"`
 }
 // TLSConfig holds TLS configuration.
 type TLSConfig struct {
 	Enabled  bool   `yaml:"enabled"`
 	CertFile string `yaml:"cert_file"`
 	KeyFile  string `yaml:"key_file"`
 }
 // LoadConfig loads configuration from a YAML file.
 func LoadConfig(path string) (*Config, error) {
-	data, err := os.ReadFile(path)
+	data, err := fileutil.SecureFileRead(path)
 	if err != nil {
 		return nil, err
 	}
@ -95,69 +106,128 @@ func LoadConfig(path string) (*Config, error) {
 }
 func main() {
 	// Parse flags
 	configFile := flag.String("config", "configs/config-local.yaml", "Configuration file path")
 	apiKey := flag.String("api-key", "", "API key for authentication")
 	flag.Parse()
-	// Load config
+	cfg, err := loadServerConfig(*configFile)
 	resolvedConfig, err := config.ResolveConfigPath(*configFile)
 	if err != nil {
 		log.Fatalf("Failed to resolve config: %v", err)
 	}
 	cfg, err := LoadConfig(resolvedConfig)
 	if err != nil {
 		log.Fatalf("Failed to load config: %v", err)
 	}
-	// Ensure log directory exists
+	if err := ensureLogDirectory(cfg.Logging); err != nil {
-	if cfg.Logging.File != "" {
+		log.Fatalf("Failed to prepare log directory: %v", err)
 		logDir := filepath.Dir(cfg.Logging.File)
 		log.Printf("Creating log directory: %s", logDir)
 		if err := os.MkdirAll(logDir, 0755); err != nil {
 			log.Fatalf("Failed to create log directory: %v", err)
 		}
 	}
-	// Setup logging
+	logger := setupLogger(cfg.Logging)
 	logger := logging.NewLoggerFromConfig(cfg.Logging)
 	ctx := logging.EnsureTrace(context.Background())
 	logger = logger.Component(ctx, "api-server")
-	// Setup experiment manager
+	expManager, err := initExperimentManager(cfg.BasePath, logger)
-	basePath := cfg.BasePath
+	if err != nil {
 		logger.Fatal("failed to initialize experiment manager", "error", err)
 	}
 	taskQueue, queueCleanup := initTaskQueue(cfg, logger)
 	if queueCleanup != nil {
 		defer queueCleanup()
 	}
 	db, dbCleanup := initDatabase(cfg, logger)
 	if dbCleanup != nil {
 		defer dbCleanup()
 	}
 	authCfg := buildAuthConfig(cfg.Auth, logger)
 	sec := newSecurityMiddleware(cfg)
 	mux := buildHTTPMux(cfg, logger, expManager, taskQueue, authCfg, db)
 	finalHandler := wrapWithMiddleware(cfg, sec, mux)
 	server := newHTTPServer(cfg, finalHandler)
 	startServer(server, cfg, logger)
 	waitForShutdown(server, logger)
 	_ = apiKey // Reserved for future authentication enhancements
 }
 func loadServerConfig(path string) (*Config, error) {
 	resolvedConfig, err := config.ResolveConfigPath(path)
 	if err != nil {
 		return nil, err
 	}
 	cfg, err := LoadConfig(resolvedConfig)
 	if err != nil {
 		return nil, err
 	}
 	cfg.Resources.ApplyDefaults()
 	return cfg, nil
 }
 func ensureLogDirectory(cfg logging.Config) error {
 	if cfg.File == "" {
 		return nil
 	}
 	logDir := filepath.Dir(cfg.File)
 	log.Printf("Creating log directory: %s", logDir)
 	return os.MkdirAll(logDir, 0750)
 }
 func setupLogger(cfg logging.Config) *logging.Logger {
 	logger := logging.NewLoggerFromConfig(cfg)
 	ctx := logging.EnsureTrace(context.Background())
 	return logger.Component(ctx, "api-server")
 }
 func initExperimentManager(basePath string, logger *logging.Logger) (*experiment.Manager, error) {
 	if basePath == "" {
 		basePath = "/tmp/ml-experiments"
 	}
 	expManager := experiment.NewManager(basePath)
 	log.Printf("Initializing experiment manager with base_path: %s", basePath)
 	if err := expManager.Initialize(); err != nil {
-		logger.Fatal("failed to initialize experiment manager", "error", err)
+		return nil, err
 	}
 	logger.Info("experiment manager initialized", "base_path", basePath)
 	return expManager, nil
 }
-	// Setup auth
+func buildAuthConfig(cfg auth.Config, logger *logging.Logger) *auth.Config {
-	var authCfg *auth.AuthConfig
+	if !cfg.Enabled {
-	if cfg.Auth.Enabled {
+		return nil
 		authCfg = &cfg.Auth
 		logger.Info("authentication enabled")
 	}
-	// Setup HTTP server with security middleware
+	logger.Info("authentication enabled")
-	mux := http.NewServeMux()
+	return &cfg
 }
-	// Convert API keys from map to slice for security middleware
+func newSecurityMiddleware(cfg *Config) *middleware.SecurityMiddleware {
-	apiKeys := make([]string, 0, len(cfg.Auth.APIKeys))
+	apiKeys := collectAPIKeys(cfg.Auth.APIKeys)
-	for username := range cfg.Auth.APIKeys {
+	rlOpts := buildRateLimitOptions(cfg.Security.RateLimit)
-		// For now, use username as the key (in production, this should be the actual API key)
+	return middleware.NewSecurityMiddleware(apiKeys, os.Getenv("JWT_SECRET"), rlOpts)
 }
 func collectAPIKeys(keys map[auth.Username]auth.APIKeyEntry) []string {
 	apiKeys := make([]string, 0, len(keys))
 	for username := range keys {
 		apiKeys = append(apiKeys, string(username))
 	}
 	return apiKeys
 }
-	// Create security middleware
+func buildRateLimitOptions(cfg RateLimitConfig) *middleware.RateLimitOptions {
-	sec := middleware.NewSecurityMiddleware(apiKeys, os.Getenv("JWT_SECRET"))
+	if !cfg.Enabled || cfg.RequestsPerMinute <= 0 {
 		return nil
 	}
-	// Setup TaskQueue
+	return &middleware.RateLimitOptions{
 		RequestsPerMinute: cfg.RequestsPerMinute,
 		BurstSize:         cfg.BurstSize,
 	}
 }
 func initTaskQueue(cfg *Config, logger *logging.Logger) (*queue.TaskQueue, func()) {
 	queueCfg := queue.Config{
 		RedisAddr:     cfg.Redis.Addr,
 		RedisPassword: cfg.Redis.Password,
@ -166,7 +236,6 @@ func main() {
 	if queueCfg.RedisAddr == "" {
 		queueCfg.RedisAddr = config.DefaultRedisAddr
 	}
 	// Support URL format for Redis
 	if cfg.Redis.URL != "" {
 		queueCfg.RedisAddr = cfg.Redis.URL
 	}
@ -174,160 +243,174 @@ func main() {
 	taskQueue, err := queue.NewTaskQueue(queueCfg)
 	if err != nil {
 		logger.Error("failed to initialize task queue", "error", err)
-		// We continue without queue, but queue operations will fail
+		return nil, nil
 	} else {
 		logger.Info("task queue initialized", "redis_addr", queueCfg.RedisAddr)
 		defer func() {
 			logger.Info("stopping task queue...")
 			if err := taskQueue.Close(); err != nil {
 				logger.Error("failed to stop task queue", "error", err)
 			} else {
 				logger.Info("task queue stopped")
 			}
 		}()
 	}
-	// Setup database if configured
+	logger.Info("task queue initialized", "redis_addr", queueCfg.RedisAddr)
-	var db *storage.DB
+	cleanup := func() {
-	if cfg.Database.Type != "" {
+		logger.Info("stopping task queue...")
-		dbConfig := storage.DBConfig{
+		if err := taskQueue.Close(); err != nil {
-			Type:       cfg.Database.Type,
+			logger.Error("failed to stop task queue", "error", err)
 			Connection: cfg.Database.Connection,
 			Host:       cfg.Database.Host,
 			Port:       cfg.Database.Port,
 			Username:   cfg.Database.Username,
 			Password:   cfg.Database.Password,
 			Database:   cfg.Database.Database,
 		}
 		db, err = storage.NewDB(dbConfig)
 		if err != nil {
 			logger.Error("failed to initialize database", "type", cfg.Database.Type, "error", err)
 		} else {
-			// Load appropriate database schema
+			logger.Info("task queue stopped")
 			var schemaPath string
 			if cfg.Database.Type == "sqlite" {
 				schemaPath = "internal/storage/schema.sql"
 			} else if cfg.Database.Type == "postgres" || cfg.Database.Type == "postgresql" {
 				schemaPath = "internal/storage/schema_postgres.sql"
 			} else {
 				logger.Error("unsupported database type", "type", cfg.Database.Type)
 				db.Close()
 				db = nil
 			}
 			if db != nil && schemaPath != "" {
 				schema, err := os.ReadFile(schemaPath)
 				if err != nil {
 					logger.Error("failed to read database schema file", "path", schemaPath, "error", err)
 					db.Close()
 					db = nil
 				} else {
 					if err := db.Initialize(string(schema)); err != nil {
 						logger.Error("failed to initialize database schema", "error", err)
 						db.Close()
 						db = nil
 					} else {
 						logger.Info("database initialized", "type", cfg.Database.Type, "connection", cfg.Database.Connection)
 						defer func() {
 							logger.Info("closing database connection...")
 							if err := db.Close(); err != nil {
 								logger.Error("failed to close database", "error", err)
 							} else {
 								logger.Info("database connection closed")
 							}
 						}()
 					}
 				}
 			}
 		}
 	}
 	return taskQueue, cleanup
 }
-	// Setup WebSocket handler with authentication
+func initDatabase(cfg *Config, logger *logging.Logger) (*storage.DB, func()) {
 	if cfg.Database.Type == "" {
 		return nil, nil
 	}
 	dbConfig := storage.DBConfig{
 		Type:       cfg.Database.Type,
 		Connection: cfg.Database.Connection,
 		Host:       cfg.Database.Host,
 		Port:       cfg.Database.Port,
 		Username:   cfg.Database.Username,
 		Password:   cfg.Database.Password,
 		Database:   cfg.Database.Database,
 	}
 	db, err := storage.NewDB(dbConfig)
 	if err != nil {
 		logger.Error("failed to initialize database", "type", cfg.Database.Type, "error", err)
 		return nil, nil
 	}
 	schemaPath := schemaPathForDB(cfg.Database.Type)
 	if schemaPath == "" {
 		logger.Error("unsupported database type", "type", cfg.Database.Type)
 		_ = db.Close()
 		return nil, nil
 	}
 	schema, err := fileutil.SecureFileRead(schemaPath)
 	if err != nil {
 		logger.Error("failed to read database schema file", "path", schemaPath, "error", err)
 		_ = db.Close()
 		return nil, nil
 	}
 	if err := db.Initialize(string(schema)); err != nil {
 		logger.Error("failed to initialize database schema", "error", err)
 		_ = db.Close()
 		return nil, nil
 	}
 	logger.Info("database initialized", "type", cfg.Database.Type, "connection", cfg.Database.Connection)
 	cleanup := func() {
 		logger.Info("closing database connection...")
 		if err := db.Close(); err != nil {
 			logger.Error("failed to close database", "error", err)
 		} else {
 			logger.Info("database connection closed")
 		}
 	}
 	return db, cleanup
 }
 func schemaPathForDB(dbType string) string {
 	switch dbType {
 	case "sqlite":
 		return "internal/storage/schema_sqlite.sql"
 	case "postgres", "postgresql":
 		return "internal/storage/schema_postgres.sql"
 	default:
 		return ""
 	}
 }
 func buildHTTPMux(
 	cfg *Config,
 	logger *logging.Logger,
 	expManager *experiment.Manager,
 	taskQueue *queue.TaskQueue,
 	authCfg *auth.Config,
 	db *storage.DB,
 ) *http.ServeMux {
 	mux := http.NewServeMux()
 	wsHandler := api.NewWSHandler(authCfg, logger, expManager, taskQueue)
 	// WebSocket endpoint - no middleware to avoid hijacking issues
 	mux.Handle("/ws", wsHandler)
-	mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
+	mux.HandleFunc("/health", func(w http.ResponseWriter, _ *http.Request) {
 		w.WriteHeader(http.StatusOK)
-		fmt.Fprintf(w, "OK\n")
+		_, _ = fmt.Fprintf(w, "OK\n")
 	})
-	// Database status endpoint
+	mux.HandleFunc("/db-status", func(w http.ResponseWriter, _ *http.Request) {
 	mux.HandleFunc("/db-status", func(w http.ResponseWriter, r *http.Request) {
 		w.Header().Set("Content-Type", "application/json")
-		if db != nil {
+		if db == nil {
 			// Test database connection with a simple query
 			var result struct {
 				Status  string `json:"status"`
 				Type    string `json:"type"`
 				Path    string `json:"path"`
 				Message string `json:"message"`
 			}
 			result.Status = "connected"
 			result.Type = "sqlite"
 			result.Path = cfg.Database.Connection
 			result.Message = "SQLite database is operational"
 			// Test a simple query to verify connectivity
 			if err := db.RecordSystemMetric("db_test", "ok"); err != nil {
 				result.Status = "error"
 				result.Message = fmt.Sprintf("Database query failed: %v", err)
 			}
 			jsonBytes, _ := json.Marshal(result)
 			w.Write(jsonBytes)
 		} else {
 			w.WriteHeader(http.StatusServiceUnavailable)
-			fmt.Fprintf(w, `{"status":"disconnected","message":"Database not configured or failed to initialize"}`)
+			_, _ = fmt.Fprintf(w, `{"status":"disconnected","message":"Database not configured or failed to initialize"}`)
 			return
 		}
 		var result struct {
 			Status  string `json:"status"`
 			Type    string `json:"type"`
 			Path    string `json:"path"`
 			Message string `json:"message"`
 		}
 		result.Status = "connected"
 		result.Type = cfg.Database.Type
 		result.Path = cfg.Database.Connection
 		result.Message = fmt.Sprintf("%s database is operational", cfg.Database.Type)
 		if err := db.RecordSystemMetric("db_test", "ok"); err != nil {
 			result.Status = "error"
 			result.Message = fmt.Sprintf("Database query failed: %v", err)
 		}
 		jsonBytes, _ := json.Marshal(result)
 		_, _ = w.Write(jsonBytes)
 	})
-	// Apply security middleware to all routes except WebSocket
+	return mux
-	// Create separate handlers for WebSocket vs other routes
+}
 	var finalHandler http.Handler = mux
-	// Wrap non-websocket routes with security middleware
+func wrapWithMiddleware(cfg *Config, sec *middleware.SecurityMiddleware, mux *http.ServeMux) http.Handler {
-	finalHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		if r.URL.Path == "/ws" {
 			mux.ServeHTTP(w, r)
-		} else {
+			return
 			// Apply middleware chain for non-WebSocket routes
 			handler := sec.RateLimit(mux)
 			handler = middleware.SecurityHeaders(handler)
 			handler = middleware.CORS(handler)
 			handler = middleware.RequestTimeout(30 * time.Second)(handler)
 			// Apply audit logger and IP whitelist only to non-WebSocket routes
 			handler = middleware.AuditLogger(handler)
 			if len(cfg.Security.IPWhitelist) > 0 {
 				handler = sec.IPWhitelist(cfg.Security.IPWhitelist)(handler)
 			}
 			handler.ServeHTTP(w, r)
 		}
 		handler := sec.RateLimit(mux)
 		handler = middleware.SecurityHeaders(handler)
 		handler = middleware.CORS(handler)
 		handler = middleware.RequestTimeout(30 * time.Second)(handler)
 		handler = middleware.AuditLogger(handler)
 		if len(cfg.Security.IPWhitelist) > 0 {
 			handler = sec.IPWhitelist(cfg.Security.IPWhitelist)(handler)
 		}
 		handler.ServeHTTP(w, r)
 	})
 }
-	var handler http.Handler = finalHandler
+func newHTTPServer(cfg *Config, handler http.Handler) *http.Server {
-
+	return &http.Server{
 	server := &http.Server{
 		Addr:         cfg.Server.Address,
 		Handler:      handler,
-		ReadTimeout:  15 * time.Second,
+		ReadTimeout:  30 * time.Second,
-		WriteTimeout: 15 * time.Second,
+		WriteTimeout: 30 * time.Second,
-		IdleTimeout:  60 * time.Second,
+		IdleTimeout:  120 * time.Second,
 	}
 }
 func startServer(server *http.Server, cfg *Config, logger *logging.Logger) {
 	if !cfg.Server.TLS.Enabled {
 		logger.Warn("TLS disabled for API server; do not use this configuration in production", "address", cfg.Server.Address)
 	}
 	// Start server in goroutine
 	go func() {
 		// Setup TLS if configured
 		if cfg.Server.TLS.Enabled {
 			logger.Info("starting HTTPS server", "address", cfg.Server.Address)
-			if err := server.ListenAndServeTLS(cfg.Server.TLS.CertFile, cfg.Server.TLS.KeyFile); err != nil && err != http.ErrServerClosed {
+			if err := server.ListenAndServeTLS(
 				cfg.Server.TLS.CertFile,
 				cfg.Server.TLS.KeyFile,
 			); err != nil && err != http.ErrServerClosed {
 				logger.Error("HTTPS server failed", "error", err)
 			}
 		} else {
@ -338,8 +421,9 @@ func main() {
 		}
 		os.Exit(1)
 	}()
 }
-	// Setup graceful shutdown
+func waitForShutdown(server *http.Server, logger *logging.Logger) {
 	sigChan := make(chan os.Signal, 1)
 	signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
@ -357,7 +441,4 @@ func main() {
 	}
 	logger.Info("api server stopped")
 	_ = expManager // Use expManager to avoid unused warning
 	_ = apiKey     // Will be used for auth later
 }
--- a/cmd/configlint/main.go
+++ b/cmd/configlint/main.go
@ -1,3 +1,4 @@
 // Package main implements the fetch_ml configuration linter
 package main
 import (
@ -9,6 +10,7 @@ import (
 	"path/filepath"
 	"strings"
 	"github.com/jfraeys/fetch_ml/internal/fileutil"
 	"github.com/xeipuuv/gojsonschema"
 	"gopkg.in/yaml.v3"
 )
@ -51,12 +53,12 @@ func main() {
 }
 func loadSchema(schemaPath string) (gojsonschema.JSONLoader, error) {
-	data, err := os.ReadFile(schemaPath)
+	data, err := fileutil.SecureFileRead(schemaPath)
 	if err != nil {
 		return nil, err
 	}
-	var schemaYAML interface{}
+	var schemaYAML any
 	if err := yaml.Unmarshal(data, &schemaYAML); err != nil {
 		return nil, err
 	}
@ -70,7 +72,10 @@ func loadSchema(schemaPath string) (gojsonschema.JSONLoader, error) {
 	if err != nil {
 		return nil, err
 	}
-	defer tmpFile.Close()
+	defer func() {
 		_ = tmpFile.Close()
 		_ = os.Remove(tmpFile.Name())
 	}()
 	if _, err := tmpFile.Write(schemaJSON); err != nil {
 		return nil, err
@ -80,7 +85,7 @@ func loadSchema(schemaPath string) (gojsonschema.JSONLoader, error) {
 }
 func validateConfig(schemaLoader gojsonschema.JSONLoader, configPath string) error {
-	data, err := os.ReadFile(configPath)
+	data, err := fileutil.SecureFileRead(configPath)
 	if err != nil {
 		return err
 	}
--- a/cmd/data_manager/data_manager_config.go
+++ b/cmd/data_manager/data_manager_config.go
@ -3,13 +3,14 @@ package main
 import (
 	"fmt"
 	"os"
 	"github.com/jfraeys/fetch_ml/internal/auth"
 	"github.com/jfraeys/fetch_ml/internal/config"
 	"github.com/jfraeys/fetch_ml/internal/fileutil"
 	"gopkg.in/yaml.v3"
 )
 // DataConfig holds the configuration for the data manager
 type DataConfig struct {
 	// ML Server (where training runs)
 	MLHost    string `yaml:"ml_host"`
@ -31,7 +32,7 @@ type DataConfig struct {
 	RedisDB       int    `yaml:"redis_db"`
 	// Authentication
-	Auth auth.AuthConfig `yaml:"auth"`
+	Auth auth.Config `yaml:"auth"`
 	// Cleanup settings
 	MaxAgeHours     int `yaml:"max_age_hours"`        // Delete data older than X hours
@ -45,8 +46,9 @@ type DataConfig struct {
 	GPUAccess          bool   `yaml:"gpu_access"`
 }
 // LoadDataConfig loads data manager configuration from a YAML file.
 func LoadDataConfig(path string) (*DataConfig, error) {
-	data, err := os.ReadFile(path)
+	data, err := fileutil.SecureFileRead(path)
 	if err != nil {
 		return nil, err
 	}
@ -96,7 +98,7 @@ func LoadDataConfig(path string) (*DataConfig, error) {
 	return &cfg, nil
 }
-// Validate implements utils.Validator interface
+// Validate implements utils.Validator interface.
 func (c *DataConfig) Validate() error {
 	if c.MLPort != 0 {
 		if err := config.ValidatePort(c.MLPort); err != nil {
--- a/cmd/data_manager/data_sync.go
+++ b/cmd/data_manager/data_sync.go
@ -16,16 +16,17 @@ import (
 	"github.com/jfraeys/fetch_ml/internal/auth"
 	"github.com/jfraeys/fetch_ml/internal/container"
-	"github.com/jfraeys/fetch_ml/internal/errors"
+	"github.com/jfraeys/fetch_ml/internal/errtypes"
 	"github.com/jfraeys/fetch_ml/internal/logging"
 	"github.com/jfraeys/fetch_ml/internal/network"
 	"github.com/jfraeys/fetch_ml/internal/queue"
 	"github.com/jfraeys/fetch_ml/internal/telemetry"
 )
-// SSHClient alias for convenience
+// SSHClient alias for convenience.
 type SSHClient = network.SSHClient
 // DataManager manages data synchronization between NAS and ML server.
 type DataManager struct {
 	config    *DataConfig
 	mlServer  *SSHClient
@ -36,6 +37,7 @@ type DataManager struct {
 	logger    *logging.Logger
 }
 // DataFetchRequest represents a request to fetch datasets.
 type DataFetchRequest struct {
 	JobName     string    `json:"job_name"`
 	Datasets    []string  `json:"datasets"` // Dataset names to fetch
@ -43,6 +45,7 @@ type DataFetchRequest struct {
 	RequestedAt time.Time `json:"requested_at"`
 }
 // DatasetInfo contains information about a dataset.
 type DatasetInfo struct {
 	Name       string    `json:"name"`
 	SizeBytes  int64     `json:"size_bytes"`
@ -50,7 +53,8 @@ type DatasetInfo struct {
 	LastAccess time.Time `json:"last_access"`
 }
-func NewDataManager(cfg *DataConfig, apiKey string) (*DataManager, error) {
+// NewDataManager creates a new DataManager instance.
 func NewDataManager(cfg *DataConfig, _ string) (*DataManager, error) {
 	mlServer, err := network.NewSSHClient(cfg.MLHost, cfg.MLUser, cfg.MLSSHKey, cfg.MLPort, "")
 	if err != nil {
 		return nil, fmt.Errorf("ML server connection failed: %w", err)
@ -79,7 +83,11 @@ func NewDataManager(cfg *DataConfig, apiKey string) (*DataManager, error) {
 	if cfg.MLDataDir != "" {
 		if _, err := mlServer.Exec(fmt.Sprintf("mkdir -p %s", cfg.MLDataDir)); err != nil {
 			logger := logging.NewLogger(slog.LevelInfo, false)
-			logger.Job(context.Background(), "data_manager", "").Error("Failed to create ML data directory", "dir", cfg.MLDataDir, "error", err)
+			logger.Job(context.Background(), "data_manager", "").Error(
 				"Failed to create ML data directory",
 				"dir", cfg.MLDataDir,
 				"error", err,
 			)
 		}
 	}
@ -123,6 +131,7 @@ func NewDataManager(cfg *DataConfig, apiKey string) (*DataManager, error) {
 	}, nil
 }
 // FetchDataset fetches a dataset from NAS to ML server.
 func (dm *DataManager) FetchDataset(jobName, datasetName string) error {
 	ctx, cancel := context.WithTimeout(dm.ctx, 30*time.Minute)
 	defer cancel()
@ -134,7 +143,7 @@ func (dm *DataManager) FetchDataset(jobName, datasetName string) error {
 func (dm *DataManager) fetchDatasetInternal(ctx context.Context, jobName, datasetName string) error {
 	if err := container.ValidateJobName(datasetName); err != nil {
-		return &errors.DataFetchError{
+		return &errtypes.DataFetchError{
 			Dataset: datasetName,
 			JobName: jobName,
 			Err:     fmt.Errorf("invalid dataset name: %w", err),
@ -146,7 +155,7 @@ func (dm *DataManager) fetchDatasetInternal(ctx context.Context, jobName, datase
 	// Validate dataset size and run cleanup if needed
 	if err := dm.ValidateDatasetWithCleanup(datasetName); err != nil {
-		return &errors.DataFetchError{
+		return &errtypes.DataFetchError{
 			Dataset: datasetName,
 			JobName: jobName,
 			Err:     fmt.Errorf("dataset size validation failed: %w", err),
@ -158,7 +167,7 @@ func (dm *DataManager) fetchDatasetInternal(ctx context.Context, jobName, datase
 	// Check if dataset exists on NAS
 	if !dm.nasServer.FileExists(nasPath) {
-		return &errors.DataFetchError{
+		return &errtypes.DataFetchError{
 			Dataset: datasetName,
 			JobName: jobName,
 			Err:     fmt.Errorf("dataset not found on NAS"),
@ -384,6 +393,7 @@ func (dm *DataManager) ListDatasetsOnML() ([]DatasetInfo, error) {
 	return datasets, nil
 }
 // CleanupOldData removes old datasets based on age and size limits.
 func (dm *DataManager) CleanupOldData() error {
 	logger := dm.logger.Job(dm.ctx, "data_manager", "")
 	logger.Info("running data cleanup")
@ -466,7 +476,7 @@ func (dm *DataManager) CleanupOldData() error {
 	return nil
 }
-// GetAvailableDiskSpace returns available disk space in bytes
+// GetAvailableDiskSpace returns available disk space in bytes.
 func (dm *DataManager) GetAvailableDiskSpace() int64 {
 	logger := dm.logger.Job(dm.ctx, "data_manager", "")
@ -489,7 +499,7 @@ func (dm *DataManager) GetAvailableDiskSpace() int64 {
 	return freeKB * 1024 // Convert KB to bytes
 }
-// GetDatasetInfo returns information about a dataset from NAS
+// GetDatasetInfo returns information about a dataset from NAS.
 func (dm *DataManager) GetDatasetInfo(datasetName string) (*DatasetInfo, error) {
 	// Check if dataset exists on NAS
 	nasPath := filepath.Join(dm.config.NASDataDir, datasetName)
@ -533,7 +543,7 @@ func (dm *DataManager) GetDatasetInfo(datasetName string) (*DatasetInfo, error)
 	}, nil
 }
-// ValidateDatasetWithCleanup checks if dataset fits and runs cleanup if needed
+// ValidateDatasetWithCleanup checks if dataset fits and runs cleanup if needed.
 func (dm *DataManager) ValidateDatasetWithCleanup(datasetName string) error {
 	logger := dm.logger.Job(dm.ctx, "data_manager", "")
@ -585,6 +595,7 @@ func (dm *DataManager) ValidateDatasetWithCleanup(datasetName string) error {
 		float64(availableSpace)/(1024*1024*1024))
 }
 // StartCleanupLoop starts the periodic cleanup loop.
 func (dm *DataManager) StartCleanupLoop() {
 	logger := dm.logger.Job(dm.ctx, "data_manager", "")
 	ticker := time.NewTicker(time.Duration(dm.config.CleanupInterval) * time.Minute)
@ -632,7 +643,7 @@ func (dm *DataManager) Close() {
 func main() {
 	// Parse authentication flags
 	authFlags := auth.ParseAuthFlags()
-	if err := auth.ValidateAuthFlags(authFlags); err != nil {
+	if err := auth.ValidateFlags(authFlags); err != nil {
 		log.Fatalf("Authentication flag error: %v", err)
 	}
@ -647,7 +658,8 @@ func main() {
 	// Parse command line args
 	if len(os.Args) < 2 {
 		fmt.Println("Usage:")
-		fmt.Println("  data_manager [--config configs/config-local.yaml] [--api-key <key>] fetch <job-name> <dataset> [dataset...]")
+		fmt.Println("  data_manager [--config configs/config-local.yaml] [--api-key <key>] " +
 			"fetch <job-name> <dataset> [dataset...]")
 		fmt.Println("  data_manager [--config configs/config-local.yaml] [--api-key <key>] list")
 		fmt.Println("  data_manager [--config configs/config-local.yaml] [--api-key <key>] cleanup")
 		fmt.Println("  data_manager [--config configs/config-local.yaml] [--api-key <key>] validate <dataset>")
@ -701,7 +713,8 @@ func main() {
 	switch cmd {
 	case "fetch":
 		if len(os.Args) < 4 {
-			log.Fatal("Usage: data_manager fetch <job-name> <dataset> [dataset...]")
+			log.Printf("Usage: data_manager fetch <job-name> <dataset> [dataset...]")
 			return
 		}
 		jobName := os.Args[2]
 		datasets := os.Args[3:]
@ -717,7 +730,8 @@ func main() {
 	case "list":
 		datasets, err := dm.ListDatasetsOnML()
 		if err != nil {
-			log.Fatalf("Failed to list datasets: %v", err)
+			log.Printf("Failed to list datasets: %v", err)
 			return
 		}
 		fmt.Println("Datasets on ML server:")
@ -736,19 +750,22 @@ func main() {
 	case "validate":
 		if len(os.Args) < 3 {
-			log.Fatal("Usage: data_manager validate <dataset>")
+			log.Printf("Usage: data_manager validate <dataset>")
 			return
 		}
 		dataset := os.Args[2]
 		fmt.Printf("Validating dataset: %s\n", dataset)
 		if err := dm.ValidateDatasetWithCleanup(dataset); err != nil {
-			log.Fatalf("Validation failed: %v", err)
+			log.Printf("Validation failed: %v", err)
 			return
 		}
 		fmt.Printf("✅ Dataset %s can be downloaded\n", dataset)
 	case "cleanup":
 		if err := dm.CleanupOldData(); err != nil {
-			log.Fatalf("Cleanup failed: %v", err)
+			log.Printf("Cleanup failed: %v", err)
 			return
 		}
 	case "daemon":
@ -770,6 +787,6 @@ func main() {
 		logger.Info("data manager shut down gracefully")
 	default:
-		log.Fatalf("Unknown command: %s", cmd)
+		log.Printf("Unknown command: %s", cmd)
 	}
 }
--- a/cmd/db-utils/init_multi_user.go
+++ b/cmd/db-utils/init_multi_user.go
@ -0,0 +1,92 @@
 package main
 import (
 	"database/sql"
 	"fmt"
 	"log"
 	"os"
 	_ "github.com/mattn/go-sqlite3"
 )
 func main() {
 	if len(os.Args) < 2 {
 		fmt.Println("Usage: go run init_db.go <database_path>")
 		fmt.Println("Example: go run init_db.go /app/data/experiments/fetch_ml.db")
 		os.Exit(1)
 	}
 	dbPath := os.Args[1]
 	// Open database
 	db, err := sql.Open("sqlite3", dbPath)
 	if err != nil {
 		log.Fatalf("Failed to open database: %v", err)
 	}
 	defer db.Close()
 	// Create api_keys table if not exists
 	createTable := `
 	CREATE TABLE IF NOT EXISTS api_keys (
 		id INTEGER PRIMARY KEY AUTOINCREMENT,
 		user_id TEXT NOT NULL UNIQUE,
 		key_hash TEXT NOT NULL UNIQUE,
 		admin BOOLEAN NOT NULL DEFAULT FALSE,
 		roles TEXT NOT NULL DEFAULT '[]',
 		permissions TEXT NOT NULL DEFAULT '{}',
 		created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
 		expires_at DATETIME,
 		revoked_at DATETIME,
 		CHECK (json_valid(roles)),
 		CHECK (json_valid(permissions))
 	);`
 	if _, err := db.Exec(createTable); err != nil {
 		log.Fatalf("Failed to create table: %v", err)
 	}
 	// Insert users
 	users := []struct {
 		userID      string
 		keyHash     string
 		admin       bool
 		roles       string
 		permissions string
 	}{
 		{
 			"admin_user",
 			"5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8",
 			true,
 			`["user", "admin"]`,
 			`{"read": true, "write": true, "delete": true}`,
 		},
 		{
 			"researcher1",
 			"ef92b778ba7a6c8f2150019a5678047b6a9a2b95cef8189518f9b35c54d2e3ae",
 			false,
 			`["user", "researcher"]`,
 			`{"read": true, "write": true, "delete": false}`,
 		},
 		{
 			"analyst1",
 			"a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3",
 			false,
 			`["user", "analyst"]`,
 			`{"read": true, "write": false, "delete": false}`,
 		},
 	}
 	for _, user := range users {
 		insert := `
 		INSERT OR REPLACE INTO api_keys (user_id, key_hash, admin, roles, permissions) 
 		VALUES (?, ?, ?, ?, ?)`
 		if _, err := db.Exec(insert, user.userID, user.keyHash, user.admin, user.roles, user.permissions); err != nil {
 			log.Printf("Failed to insert user %s: %v", user.userID, err)
 		} else {
 			fmt.Printf("Successfully inserted user: %s\n", user.userID)
 		}
 	}
 	fmt.Println("Database initialization complete!")
 }
--- a/cmd/db-utils/init_multi_user.sql
+++ b/cmd/db-utils/init_multi_user.sql
@ -0,0 +1,27 @@
 -- Initialize multi-user database with API keys
 -- First ensure the api_keys table exists
 CREATE TABLE IF NOT EXISTS api_keys (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    user_id TEXT NOT NULL UNIQUE,
    key_hash TEXT NOT NULL UNIQUE,
    admin BOOLEAN NOT NULL DEFAULT FALSE,
    roles TEXT NOT NULL DEFAULT '[]',
    permissions TEXT NOT NULL DEFAULT '{}',
    created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
    expires_at DATETIME,
    revoked_at DATETIME,
    CHECK (json_valid(roles)),
    CHECK (json_valid(permissions))
 );
 -- Insert admin user with full permissions
 INSERT OR REPLACE INTO api_keys (user_id, key_hash, admin, roles, permissions) 
 VALUES ('admin_user', '5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8', TRUE, '["user", "admin"]', '{"read": true, "write": true, "delete": true}');
 -- Insert researcher with read/write permissions
 INSERT OR REPLACE INTO api_keys (user_id, key_hash, admin, roles, permissions) 
 VALUES ('researcher1', 'ef92b778ba7a6c8f2150019a5678047b6a9a2b95cef8189518f9b35c54d2e3ae', FALSE, '["user", "researcher"]', '{"read": true, "write": true, "delete": false}');
 -- Insert analyst with read-only permissions
 INSERT OR REPLACE INTO api_keys (user_id, key_hash, admin, roles, permissions) 
 VALUES ('analyst1', 'a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3', FALSE, '["user", "analyst"]', '{"read": true, "write": false, "delete": false}');
--- a/cmd/tui/internal/config/cli_config.go
+++ b/cmd/tui/internal/config/cli_config.go
@ -1,3 +1,4 @@
 // Package config provides TUI configuration management
 package config
 import (
@ -70,15 +71,14 @@ func LoadCLIConfig(configPath string) (*CLIConfig, string, error) {
 		log.Printf("Warning: %v", err)
 	}
 	//nolint:gosec // G304: Config path is user-controlled but trusted
 	data, err := os.ReadFile(configPath)
 	if err != nil {
 		return nil, configPath, fmt.Errorf("failed to read CLI config: %w", err)
 	}
 	config := &CLIConfig{}
-	if err := parseTOML(data, config); err != nil {
+	parseTOML(data, config)
 		return nil, configPath, fmt.Errorf("failed to parse CLI config: %w", err)
 	}
 	if err := config.Validate(); err != nil {
 		return nil, configPath, err
@ -126,7 +126,7 @@ func LoadCLIConfig(configPath string) (*CLIConfig, string, error) {
 }
 // parseTOML is a simple TOML parser for the CLI config format
-func parseTOML(data []byte, config *CLIConfig) error {
+func parseTOML(data []byte, config *CLIConfig) {
 	lines := strings.Split(string(data), "\n")
 	for _, line := range lines {
@ -163,8 +163,6 @@ func parseTOML(data []byte, config *CLIConfig) error {
 			config.APIKey = value
 		}
 	}
 	return nil
 }
 // ToTUIConfig converts CLI config to TUI config structure
@ -188,7 +186,7 @@ func (c *CLIConfig) ToTUIConfig() *Config {
 	}
 	// Set up auth config with CLI API key
-	tuiConfig.Auth = auth.AuthConfig{
+	tuiConfig.Auth = auth.Config{
 		Enabled: true,
 		APIKeys: map[auth.Username]auth.APIKeyEntry{
 			"cli_user": {
@ -262,7 +260,7 @@ func (c *CLIConfig) AuthenticateWithServer() error {
 	}
 	// Create temporary auth config for validation
-	authConfig := &auth.AuthConfig{
+	authConfig := &auth.Config{
 		Enabled: true,
 		APIKeys: map[auth.Username]auth.APIKeyEntry{
 			"temp": {
@ -356,6 +354,7 @@ func migrateFromYAML(yamlPath, tomlPath string) (string, error) {
 	}
 	// Read YAML config
 	//nolint:gosec // G304: Config path is user-controlled but trusted
 	data, err := os.ReadFile(yamlPath)
 	if err != nil {
 		return "", fmt.Errorf("failed to read YAML config: %w", err)
@ -421,7 +420,7 @@ api_key = "%s"
 	)
 	// Create directory if it doesn't exist
-	if err := os.MkdirAll(filepath.Dir(tomlPath), 0755); err != nil {
+	if err := os.MkdirAll(filepath.Dir(tomlPath), 0750); err != nil {
 		return "", fmt.Errorf("failed to create config directory: %w", err)
 	}
@ -433,8 +432,8 @@ api_key = "%s"
 	return tomlPath, nil
 }
-// ConfigExists checks if a CLI configuration file exists
+// Exists checks if a CLI configuration file exists
-func ConfigExists(configPath string) bool {
+func Exists(configPath string) bool {
 	if configPath == "" {
 		home, err := os.UserHomeDir()
 		if err != nil {
@ -450,7 +449,7 @@ func ConfigExists(configPath string) bool {
 // GenerateDefaultConfig creates a default TOML configuration file
 func GenerateDefaultConfig(configPath string) error {
 	// Create directory if it doesn't exist
-	if err := os.MkdirAll(filepath.Dir(configPath), 0755); err != nil {
+	if err := os.MkdirAll(filepath.Dir(configPath), 0750); err != nil {
 		return fmt.Errorf("failed to create config directory: %w", err)
 	}
--- a/cmd/tui/internal/config/cli_config_test.go
+++ b/cmd/tui/internal/config/cli_config_test.go
@ -1,194 +0,0 @@
 package config
 import (
 	"testing"
 )
 func TestCLIConfig_CheckPermission(t *testing.T) {
 	tests := []struct {
 		name       string
 		config     *CLIConfig
 		permission string
 		want       bool
 	}{
 		{
 			name: "Admin has all permissions",
 			config: &CLIConfig{
 				CurrentUser: &UserContext{
 					Name:  "admin",
 					Admin: true,
 				},
 			},
 			permission: "any:permission",
 			want:       true,
 		},
 		{
 			name: "User with explicit permission",
 			config: &CLIConfig{
 				CurrentUser: &UserContext{
 					Name:        "user",
 					Admin:       false,
 					Permissions: map[string]bool{"jobs:create": true},
 				},
 			},
 			permission: "jobs:create",
 			want:       true,
 		},
 		{
 			name: "User without permission",
 			config: &CLIConfig{
 				CurrentUser: &UserContext{
 					Name:        "user",
 					Admin:       false,
 					Permissions: map[string]bool{"jobs:read": true},
 				},
 			},
 			permission: "jobs:create",
 			want:       false,
 		},
 		{
 			name: "No current user",
 			config: &CLIConfig{
 				CurrentUser: nil,
 			},
 			permission: "jobs:create",
 			want:       false,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			got := tt.config.CheckPermission(tt.permission)
 			if got != tt.want {
 				t.Errorf("CheckPermission() = %v, want %v", got, tt.want)
 			}
 		})
 	}
 }
 func TestCLIConfig_CanViewJob(t *testing.T) {
 	tests := []struct {
 		name      string
 		config    *CLIConfig
 		jobUserID string
 		want      bool
 	}{
 		{
 			name: "Admin can view any job",
 			config: &CLIConfig{
 				CurrentUser: &UserContext{
 					Name:  "admin",
 					Admin: true,
 				},
 			},
 			jobUserID: "other_user",
 			want:      true,
 		},
 		{
 			name: "User can view own job",
 			config: &CLIConfig{
 				CurrentUser: &UserContext{
 					Name:  "user1",
 					Admin: false,
 				},
 			},
 			jobUserID: "user1",
 			want:      true,
 		},
 		{
 			name: "User cannot view other's job",
 			config: &CLIConfig{
 				CurrentUser: &UserContext{
 					Name:  "user1",
 					Admin: false,
 				},
 			},
 			jobUserID: "user2",
 			want:      false,
 		},
 		{
 			name: "No current user cannot view",
 			config: &CLIConfig{
 				CurrentUser: nil,
 			},
 			jobUserID: "user1",
 			want:      false,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			got := tt.config.CanViewJob(tt.jobUserID)
 			if got != tt.want {
 				t.Errorf("CanViewJob() = %v, want %v", got, tt.want)
 			}
 		})
 	}
 }
 func TestCLIConfig_CanModifyJob(t *testing.T) {
 	tests := []struct {
 		name      string
 		config    *CLIConfig
 		jobUserID string
 		want      bool
 	}{
 		{
 			name: "Admin can modify any job",
 			config: &CLIConfig{
 				CurrentUser: &UserContext{
 					Name:        "admin",
 					Admin:       true,
 					Permissions: map[string]bool{"jobs:update": true},
 				},
 			},
 			jobUserID: "other_user",
 			want:      true,
 		},
 		{
 			name: "User with permission can modify own job",
 			config: &CLIConfig{
 				CurrentUser: &UserContext{
 					Name:        "user1",
 					Admin:       false,
 					Permissions: map[string]bool{"jobs:update": true},
 				},
 			},
 			jobUserID: "user1",
 			want:      true,
 		},
 		{
 			name: "User without permission cannot modify",
 			config: &CLIConfig{
 				CurrentUser: &UserContext{
 					Name:        "user1",
 					Admin:       false,
 					Permissions: map[string]bool{"jobs:read": true},
 				},
 			},
 			jobUserID: "user1",
 			want:      false,
 		},
 		{
 			name: "User cannot modify other's job",
 			config: &CLIConfig{
 				CurrentUser: &UserContext{
 					Name:        "user1",
 					Admin:       false,
 					Permissions: map[string]bool{"jobs:update": true},
 				},
 			},
 			jobUserID: "user2",
 			want:      false,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			got := tt.config.CanModifyJob(tt.jobUserID)
 			if got != tt.want {
 				t.Errorf("CanModifyJob() = %v, want %v", got, tt.want)
 			}
 		})
 	}
 }
--- a/cmd/tui/internal/config/config.go
+++ b/cmd/tui/internal/config/config.go
@ -25,7 +25,7 @@ type Config struct {
 	KnownHosts    string `toml:"known_hosts"`
 	// Authentication
-	Auth auth.AuthConfig `toml:"auth"`
+	Auth auth.Config `toml:"auth"`
 	// Podman settings
 	PodmanImage        string `toml:"podman_image"`
@ -34,7 +34,9 @@ type Config struct {
 	GPUAccess          bool   `toml:"gpu_access"`
 }
 // LoadConfig loads configuration from a TOML file
 func LoadConfig(path string) (*Config, error) {
 	//nolint:gosec // G304: Config path is user-controlled but trusted
 	data, err := os.ReadFile(path)
 	if err != nil {
 		return nil, err
@ -132,10 +134,17 @@ func (c *Config) Validate() error {
 	return nil
 }
-func (c *Config) PendingPath() string  { return filepath.Join(c.BasePath, "pending") }
+// PendingPath returns the path for pending experiments
-func (c *Config) RunningPath() string  { return filepath.Join(c.BasePath, "running") }
+func (c *Config) PendingPath() string { return filepath.Join(c.BasePath, "pending") }
 // RunningPath returns the path for running experiments
 func (c *Config) RunningPath() string { return filepath.Join(c.BasePath, "running") }
 // FinishedPath returns the path for finished experiments
 func (c *Config) FinishedPath() string { return filepath.Join(c.BasePath, "finished") }
-func (c *Config) FailedPath() string   { return filepath.Join(c.BasePath, "failed") }
+
 // FailedPath returns the path for failed experiments
 func (c *Config) FailedPath() string { return filepath.Join(c.BasePath, "failed") }
 // parseInt parses a string to integer
 func parseInt(s string) (int, error) {
--- a/cmd/tui/internal/controller/commands.go
+++ b/cmd/tui/internal/controller/commands.go
@ -1,3 +1,4 @@
 // Package controller provides TUI command handlers
 package controller
 import (
@ -10,22 +11,38 @@ import (
 	"github.com/jfraeys/fetch_ml/cmd/tui/internal/model"
 )
-// Message types for async operations
+// JobsLoadedMsg contains loaded jobs from the queue
-type (
+type JobsLoadedMsg []model.Job
-	JobsLoadedMsg      []model.Job
+
-	TasksLoadedMsg     []*model.Task
+// TasksLoadedMsg contains loaded tasks from the queue
-	GpuLoadedMsg       string
+type TasksLoadedMsg []*model.Task
-	ContainerLoadedMsg string
+
-	LogLoadedMsg       string
+// GpuLoadedMsg contains GPU status information
-	QueueLoadedMsg     string
+type GpuLoadedMsg string
-	SettingsContentMsg string
+
-	SettingsUpdateMsg  struct{}
+// ContainerLoadedMsg contains container status information
-	StatusMsg          struct {
+type ContainerLoadedMsg string
-		Text  string
+
-		Level string
+// LogLoadedMsg contains log content
-	}
+type LogLoadedMsg string
-	TickMsg time.Time
+
-)
+// QueueLoadedMsg contains queue status information
 type QueueLoadedMsg string
 // SettingsContentMsg contains settings content
 type SettingsContentMsg string
 // SettingsUpdateMsg indicates settings should be updated
 type SettingsUpdateMsg struct{}
 // StatusMsg contains status text and level
 type StatusMsg struct {
 	Text  string
 	Level string
 }
 // TickMsg represents a timer tick
 type TickMsg time.Time
 // Command factories for loading data
@ -50,7 +67,12 @@ func (c *Controller) loadJobs() tea.Cmd {
 			var jobs []model.Job
 			statusChan := make(chan []model.Job, 4)
-			for _, status := range []model.JobStatus{model.StatusPending, model.StatusRunning, model.StatusFinished, model.StatusFailed} {
+			for _, status := range []model.JobStatus{
 				model.StatusPending,
 				model.StatusRunning,
 				model.StatusFinished,
 				model.StatusFailed,
 			} {
 				go func(s model.JobStatus) {
 					path := c.getPathForStatus(s)
 					names := c.server.ListDir(path)
@ -112,7 +134,8 @@ func (c *Controller) loadGPU() tea.Cmd {
 		resultChan := make(chan gpuResult, 1)
 		go func() {
-			cmd := "nvidia-smi --query-gpu=index,name,utilization.gpu,memory.used,memory.total,temperature.gpu --format=csv,noheader,nounits"
+			cmd := "nvidia-smi --query-gpu=index,name,utilization.gpu," +
 				"memory.used,memory.total,temperature.gpu --format=csv,noheader,nounits"
 			out, err := c.server.Exec(cmd)
 			if err == nil && strings.TrimSpace(out) != "" {
 				var formatted strings.Builder
@ -137,7 +160,10 @@ func (c *Controller) loadGPU() tea.Cmd {
 			out, err = c.server.Exec(cmd)
 			if err != nil {
 				c.logger.Warn("GPU info unavailable", "error", err)
-				resultChan <- gpuResult{content: "⚠️  GPU info unavailable\n\nRun on a system with nvidia-smi or macOS GPU", err: err}
+				resultChan <- gpuResult{
 					content: "GPU info unavailable\n\nRun on a system with nvidia-smi or macOS GPU",
 					err:     err,
 				}
 				return
 			}
@ -232,43 +258,6 @@ func (c *Controller) loadContainer() tea.Cmd {
 	}
 }
 func (c *Controller) loadLog(jobName string) tea.Cmd {
 	return func() tea.Msg {
 		resultChan := make(chan string, 1)
 		go func() {
 			statusChan := make(chan string, 3)
 			for _, status := range []model.JobStatus{model.StatusRunning, model.StatusFinished, model.StatusFailed} {
 				go func(s model.JobStatus) {
 					logPath := filepath.Join(c.getPathForStatus(s), jobName, "output.log")
 					if c.server.RemoteExists(logPath) {
 						content := c.server.TailFile(logPath, 200)
 						statusChan <- content
 					} else {
 						statusChan <- ""
 					}
 				}(status)
 			}
 			for range 3 {
 				result := <-statusChan
 				if result != "" {
 					var formatted strings.Builder
 					formatted.WriteString(fmt.Sprintf("📋 Log: %s\n", jobName))
 					formatted.WriteString(strings.Repeat("═", 60) + "\n\n")
 					formatted.WriteString(result)
 					resultChan <- formatted.String()
 					return
 				}
 			}
 			resultChan <- fmt.Sprintf("⚠️  No log found for %s\n\nJob may not have started yet.", jobName)
 		}()
 		return LogLoadedMsg(<-resultChan)
 	}
 }
 func (c *Controller) queueJob(jobName string, args string) tea.Cmd {
 	return func() tea.Msg {
 		resultChan := make(chan StatusMsg, 1)
--- a/cmd/tui/internal/controller/controller.go
+++ b/cmd/tui/internal/controller/controller.go
@ -21,6 +21,258 @@ type Controller struct {
 	logger    *logging.Logger
 }
 func (c *Controller) handleKeyMsg(msg tea.KeyMsg, m model.State) (model.State, tea.Cmd) {
 	if m.InputMode {
 		return c.handleInputModeKey(msg, m)
 	}
 	if m.ActiveView == model.ViewModeSettings {
 		return c.handleSettingsKeys(msg, m)
 	}
 	if key.Matches(msg, m.Keys.Quit) {
 		return m, tea.Quit
 	}
 	cmds := c.handleGlobalKeys(msg, &m)
 	return c.finalizeUpdate(msg, m, cmds...)
 }
 func (c *Controller) handleInputModeKey(msg tea.KeyMsg, m model.State) (model.State, tea.Cmd) {
 	switch msg.String() {
 	case "enter":
 		args := m.Input.Value()
 		m.Input.SetValue("")
 		m.InputMode = false
 		if job := getSelectedJob(m); job != nil {
 			return m, c.queueJob(job.Name, args)
 		}
 		return m, nil
 	case "esc":
 		m.InputMode = false
 		m.Input.SetValue("")
 		return m, nil
 	default:
 		var cmd tea.Cmd
 		m.Input, cmd = m.Input.Update(msg)
 		return m, cmd
 	}
 }
 func (c *Controller) handleSettingsKeys(msg tea.KeyMsg, m model.State) (model.State, tea.Cmd) {
 	cmds := c.navigateSettings(msg, &m)
 	if m.SettingsIndex == 1 {
 		var inputCmd tea.Cmd
 		m.APIKeyInput, inputCmd = m.APIKeyInput.Update(msg)
 		cmds = append(cmds, inputCmd, c.updateSettingsContent(m))
 	}
 	return m, tea.Batch(cmds...)
 }
 func (c *Controller) navigateSettings(msg tea.KeyMsg, m *model.State) []tea.Cmd {
 	var cmds []tea.Cmd
 	switch msg.String() {
 	case "up", "k":
 		if m.SettingsIndex > 1 {
 			m.SettingsIndex--
 			cmds = append(cmds, c.updateSettingsContent(*m))
 			c.toggleAPIKeyInputFocus(m)
 		}
 	case "down", "j":
 		if m.SettingsIndex < 2 {
 			m.SettingsIndex++
 			cmds = append(cmds, c.updateSettingsContent(*m))
 			c.toggleAPIKeyInputFocus(m)
 		}
 	case "enter":
 		if cmd := c.handleSettingsAction(m); cmd != nil {
 			cmds = append(cmds, cmd)
 		}
 	case "esc":
 		m.ActiveView = model.ViewModeJobs
 		m.APIKeyInput.Blur()
 	}
 	return cmds
 }
 func (c *Controller) toggleAPIKeyInputFocus(m *model.State) {
 	if m.SettingsIndex == 1 {
 		m.APIKeyInput.Focus()
 	} else {
 		m.APIKeyInput.Blur()
 	}
 }
 func (c *Controller) handleGlobalKeys(msg tea.KeyMsg, m *model.State) []tea.Cmd {
 	var cmds []tea.Cmd
 	switch {
 	case key.Matches(msg, m.Keys.Refresh):
 		m.IsLoading = true
 		m.Status = "Refreshing all data..."
 		m.LastRefresh = time.Now()
 		cmds = append(cmds, c.loadAllData())
 	case key.Matches(msg, m.Keys.RefreshGPU):
 		m.Status = "Refreshing GPU status..."
 		cmds = append(cmds, c.loadGPU())
 	case key.Matches(msg, m.Keys.Trigger):
 		if job := getSelectedJob(*m); job != nil {
 			cmds = append(cmds, c.queueJob(job.Name, ""))
 		}
 	case key.Matches(msg, m.Keys.TriggerArgs):
 		if job := getSelectedJob(*m); job != nil {
 			m.InputMode = true
 			m.Input.Focus()
 		}
 	case key.Matches(msg, m.Keys.ViewQueue):
 		m.ActiveView = model.ViewModeQueue
 		cmds = append(cmds, c.showQueue(*m))
 	case key.Matches(msg, m.Keys.ViewContainer):
 		m.ActiveView = model.ViewModeContainer
 		cmds = append(cmds, c.loadContainer())
 	case key.Matches(msg, m.Keys.ViewGPU):
 		m.ActiveView = model.ViewModeGPU
 		cmds = append(cmds, c.loadGPU())
 	case key.Matches(msg, m.Keys.ViewJobs):
 		m.ActiveView = model.ViewModeJobs
 	case key.Matches(msg, m.Keys.ViewSettings):
 		m.ActiveView = model.ViewModeSettings
 		m.SettingsIndex = 1
 		m.APIKeyInput.Focus()
 		cmds = append(cmds, c.updateSettingsContent(*m))
 	case key.Matches(msg, m.Keys.ViewExperiments):
 		m.ActiveView = model.ViewModeExperiments
 		cmds = append(cmds, c.loadExperiments())
 	case key.Matches(msg, m.Keys.Cancel):
 		if job := getSelectedJob(*m); job != nil && job.TaskID != "" {
 			cmds = append(cmds, c.cancelTask(job.TaskID))
 		}
 	case key.Matches(msg, m.Keys.Delete):
 		if job := getSelectedJob(*m); job != nil && job.Status == model.StatusPending {
 			cmds = append(cmds, c.deleteJob(job.Name))
 		}
 	case key.Matches(msg, m.Keys.MarkFailed):
 		if job := getSelectedJob(*m); job != nil && job.Status == model.StatusRunning {
 			cmds = append(cmds, c.markFailed(job.Name))
 		}
 	case key.Matches(msg, m.Keys.Help):
 		m.ShowHelp = !m.ShowHelp
 	}
 	return cmds
 }
 func (c *Controller) applyWindowSize(msg tea.WindowSizeMsg, m model.State) model.State {
 	m.Width = msg.Width
 	m.Height = msg.Height
 	h, v := 4, 2
 	listHeight := msg.Height - v - 8
 	m.JobList.SetSize(msg.Width/3-h, listHeight)
 	panelWidth := msg.Width*2/3 - h - 2
 	panelHeight := (listHeight - 6) / 3
 	m.GpuView.Width = panelWidth
 	m.GpuView.Height = panelHeight
 	m.ContainerView.Width = panelWidth
 	m.ContainerView.Height = panelHeight
 	m.QueueView.Width = panelWidth
 	m.QueueView.Height = listHeight - 4
 	m.SettingsView.Width = panelWidth
 	m.SettingsView.Height = listHeight - 4
 	m.ExperimentsView.Width = panelWidth
 	m.ExperimentsView.Height = listHeight - 4
 	return m
 }
 func (c *Controller) handleJobsLoadedMsg(msg JobsLoadedMsg, m model.State) (model.State, tea.Cmd) {
 	m.Jobs = []model.Job(msg)
 	calculateJobStats(&m)
 	items := make([]list.Item, len(m.Jobs))
 	for i, job := range m.Jobs {
 		items[i] = job
 	}
 	setItemsCmd := m.JobList.SetItems(items)
 	m.Status = formatStatus(m)
 	m.IsLoading = false
 	return c.finalizeUpdate(msg, m, setItemsCmd)
 }
 func (c *Controller) handleTasksLoadedMsg(msg TasksLoadedMsg, m model.State) (model.State, tea.Cmd) {
 	m.QueuedTasks = []*model.Task(msg)
 	m.Status = formatStatus(m)
 	return c.finalizeUpdate(msg, m)
 }
 func (c *Controller) handleGPUContent(msg GpuLoadedMsg, m model.State) (model.State, tea.Cmd) {
 	m.GpuView.SetContent(string(msg))
 	m.GpuView.GotoTop()
 	return c.finalizeUpdate(msg, m)
 }
 func (c *Controller) handleContainerContent(msg ContainerLoadedMsg, m model.State) (model.State, tea.Cmd) {
 	m.ContainerView.SetContent(string(msg))
 	m.ContainerView.GotoTop()
 	return c.finalizeUpdate(msg, m)
 }
 func (c *Controller) handleQueueContent(msg QueueLoadedMsg, m model.State) (model.State, tea.Cmd) {
 	m.QueueView.SetContent(string(msg))
 	m.QueueView.GotoTop()
 	return c.finalizeUpdate(msg, m)
 }
 func (c *Controller) handleStatusMsg(msg StatusMsg, m model.State) (model.State, tea.Cmd) {
 	if msg.Level == "error" {
 		m.ErrorMsg = msg.Text
 		m.Status = "Error occurred - check status"
 	} else {
 		m.ErrorMsg = ""
 		m.Status = msg.Text
 	}
 	return c.finalizeUpdate(msg, m)
 }
 func (c *Controller) handleTickMsg(msg TickMsg, m model.State) (model.State, tea.Cmd) {
 	var cmds []tea.Cmd
 	if time.Since(m.LastRefresh) > 10*time.Second && !m.IsLoading {
 		m.LastRefresh = time.Now()
 		cmds = append(cmds, c.loadAllData())
 	}
 	cmds = append(cmds, tickCmd())
 	return c.finalizeUpdate(msg, m, cmds...)
 }
 func (c *Controller) finalizeUpdate(msg tea.Msg, m model.State, extraCmds ...tea.Cmd) (model.State, tea.Cmd) {
 	cmds := append([]tea.Cmd{}, extraCmds...)
 	var cmd tea.Cmd
 	m.JobList, cmd = m.JobList.Update(msg)
 	cmds = append(cmds, cmd)
 	m.GpuView, cmd = m.GpuView.Update(msg)
 	cmds = append(cmds, cmd)
 	m.ContainerView, cmd = m.ContainerView.Update(msg)
 	cmds = append(cmds, cmd)
 	m.QueueView, cmd = m.QueueView.Update(msg)
 	cmds = append(cmds, cmd)
 	m.ExperimentsView, cmd = m.ExperimentsView.Update(msg)
 	cmds = append(cmds, cmd)
 	var spinCmd tea.Cmd
 	m.Spinner, spinCmd = m.Spinner.Update(msg)
 	cmds = append(cmds, spinCmd)
 	return m, tea.Batch(cmds...)
 }
 // New creates a new Controller instance
 func New(cfg *config.Config, srv *services.MLServer, tq *services.TaskQueue, logger *logging.Logger) *Controller {
 	return &Controller{
@ -42,233 +294,38 @@ func (c *Controller) Init() tea.Cmd {
 // Update handles all messages and updates the state
 func (c *Controller) Update(msg tea.Msg, m model.State) (model.State, tea.Cmd) {
-	var cmds []tea.Cmd
+	switch typed := msg.(type) {
 	switch msg := msg.(type) {
 	case tea.KeyMsg:
-		// Handle input mode (for queuing jobs with args)
+		return c.handleKeyMsg(typed, m)
 		if m.InputMode {
 			switch msg.String() {
 			case "enter":
 				args := m.Input.Value()
 				m.Input.SetValue("")
 				m.InputMode = false
 				if job := getSelectedJob(m); job != nil {
 					cmds = append(cmds, c.queueJob(job.Name, args))
 				}
 				return m, tea.Batch(cmds...)
 			case "esc":
 				m.InputMode = false
 				m.Input.SetValue("")
 				return m, nil
 			}
 			var cmd tea.Cmd
 			m.Input, cmd = m.Input.Update(msg)
 			return m, cmd
 		}
 		// Handle settings-specific keys
 		if m.ActiveView == model.ViewModeSettings {
 			switch msg.String() {
 			case "up", "k":
 				if m.SettingsIndex > 1 { // Skip index 0 (Status)
 					m.SettingsIndex--
 					cmds = append(cmds, c.updateSettingsContent(m))
 					if m.SettingsIndex == 1 {
 						m.ApiKeyInput.Focus()
 					} else {
 						m.ApiKeyInput.Blur()
 					}
 				}
 			case "down", "j":
 				if m.SettingsIndex < 2 {
 					m.SettingsIndex++
 					cmds = append(cmds, c.updateSettingsContent(m))
 					if m.SettingsIndex == 1 {
 						m.ApiKeyInput.Focus()
 					} else {
 						m.ApiKeyInput.Blur()
 					}
 				}
 			case "enter":
 				if cmd := c.handleSettingsAction(&m); cmd != nil {
 					cmds = append(cmds, cmd)
 				}
 			case "esc":
 				m.ActiveView = model.ViewModeJobs
 				m.ApiKeyInput.Blur()
 			}
 			if m.SettingsIndex == 1 { // API Key input field
 				var cmd tea.Cmd
 				m.ApiKeyInput, cmd = m.ApiKeyInput.Update(msg)
 				cmds = append(cmds, cmd)
 				// Force update settings view to show typed characters immediately
 				cmds = append(cmds, c.updateSettingsContent(m))
 			}
 			return m, tea.Batch(cmds...)
 		}
 		// Handle global keys
 		switch {
 		case key.Matches(msg, m.Keys.Quit):
 			return m, tea.Quit
 		case key.Matches(msg, m.Keys.Refresh):
 			m.IsLoading = true
 			m.Status = "Refreshing all data..."
 			m.LastRefresh = time.Now()
 			cmds = append(cmds, c.loadAllData())
 		case key.Matches(msg, m.Keys.RefreshGPU):
 			m.Status = "Refreshing GPU status..."
 			cmds = append(cmds, c.loadGPU())
 		case key.Matches(msg, m.Keys.Trigger):
 			if job := getSelectedJob(m); job != nil {
 				cmds = append(cmds, c.queueJob(job.Name, ""))
 			}
 		case key.Matches(msg, m.Keys.TriggerArgs):
 			if job := getSelectedJob(m); job != nil {
 				m.InputMode = true
 				m.Input.Focus()
 			}
 		case key.Matches(msg, m.Keys.ViewQueue):
 			m.ActiveView = model.ViewModeQueue
 			cmds = append(cmds, c.showQueue(m))
 		case key.Matches(msg, m.Keys.ViewContainer):
 			m.ActiveView = model.ViewModeContainer
 			cmds = append(cmds, c.loadContainer())
 		case key.Matches(msg, m.Keys.ViewGPU):
 			m.ActiveView = model.ViewModeGPU
 			cmds = append(cmds, c.loadGPU())
 		case key.Matches(msg, m.Keys.ViewJobs):
 			m.ActiveView = model.ViewModeJobs
 		case key.Matches(msg, m.Keys.ViewSettings):
 			m.ActiveView = model.ViewModeSettings
 			m.SettingsIndex = 1 // Start at Input field, skip Status
 			m.ApiKeyInput.Focus()
 			cmds = append(cmds, c.updateSettingsContent(m))
 		case key.Matches(msg, m.Keys.ViewExperiments):
 			m.ActiveView = model.ViewModeExperiments
 			cmds = append(cmds, c.loadExperiments())
 		case key.Matches(msg, m.Keys.Cancel):
 			if job := getSelectedJob(m); job != nil && job.TaskID != "" {
 				cmds = append(cmds, c.cancelTask(job.TaskID))
 			}
 		case key.Matches(msg, m.Keys.Delete):
 			if job := getSelectedJob(m); job != nil && job.Status == model.StatusPending {
 				cmds = append(cmds, c.deleteJob(job.Name))
 			}
 		case key.Matches(msg, m.Keys.MarkFailed):
 			if job := getSelectedJob(m); job != nil && job.Status == model.StatusRunning {
 				cmds = append(cmds, c.markFailed(job.Name))
 			}
 		case key.Matches(msg, m.Keys.Help):
 			m.ShowHelp = !m.ShowHelp
 		}
 	case tea.WindowSizeMsg:
-		m.Width = msg.Width
+		updated := c.applyWindowSize(typed, m)
-		m.Height = msg.Height
+		return c.finalizeUpdate(msg, updated)
 		// Update component sizes
 		h, v := 4, 2 // docStyle.GetFrameSize() approx
 		listHeight := msg.Height - v - 8
 		m.JobList.SetSize(msg.Width/3-h, listHeight)
 		panelWidth := msg.Width*2/3 - h - 2
 		panelHeight := (listHeight - 6) / 3
 		m.GpuView.Width = panelWidth
 		m.GpuView.Height = panelHeight
 		m.ContainerView.Width = panelWidth
 		m.ContainerView.Height = panelHeight
 		m.QueueView.Width = panelWidth
 		m.QueueView.Height = listHeight - 4
 		m.SettingsView.Width = panelWidth
 		m.SettingsView.Height = listHeight - 4
 		m.ExperimentsView.Width = panelWidth
 		m.ExperimentsView.Height = listHeight - 4
 	case JobsLoadedMsg:
-		m.Jobs = []model.Job(msg)
+		return c.handleJobsLoadedMsg(typed, m)
 		calculateJobStats(&m)
 		items := make([]list.Item, len(m.Jobs))
 		for i, job := range m.Jobs {
 			items[i] = job
 		}
 		cmds = append(cmds, m.JobList.SetItems(items))
 		m.Status = formatStatus(m)
 		m.IsLoading = false
 	case TasksLoadedMsg:
-		m.QueuedTasks = []*model.Task(msg)
+		return c.handleTasksLoadedMsg(typed, m)
 		m.Status = formatStatus(m)
 	case GpuLoadedMsg:
-		m.GpuView.SetContent(string(msg))
+		return c.handleGPUContent(typed, m)
 		m.GpuView.GotoTop()
 	case ContainerLoadedMsg:
-		m.ContainerView.SetContent(string(msg))
+		return c.handleContainerContent(typed, m)
 		m.ContainerView.GotoTop()
 	case QueueLoadedMsg:
-		m.QueueView.SetContent(string(msg))
+		return c.handleQueueContent(typed, m)
 		m.QueueView.GotoTop()
 	case SettingsContentMsg:
-		m.SettingsView.SetContent(string(msg))
+		m.SettingsView.SetContent(string(typed))
-
+		return c.finalizeUpdate(msg, m)
 	case ExperimentsLoadedMsg:
-		m.ExperimentsView.SetContent(string(msg))
+		m.ExperimentsView.SetContent(string(typed))
 		m.ExperimentsView.GotoTop()
-
+		return c.finalizeUpdate(msg, m)
 	case SettingsUpdateMsg:
-		// Settings content was updated, just trigger a re-render
+		return c.finalizeUpdate(msg, m)
 	case StatusMsg:
-		if msg.Level == "error" {
+		return c.handleStatusMsg(typed, m)
 			m.ErrorMsg = msg.Text
 			m.Status = "Error occurred - check status"
 		} else {
 			m.ErrorMsg = ""
 			m.Status = msg.Text
 		}
 	case TickMsg:
-		var spinCmd tea.Cmd
+		return c.handleTickMsg(typed, m)
 		m.Spinner, spinCmd = m.Spinner.Update(msg)
 		cmds = append(cmds, spinCmd)
 		// Auto-refresh every 10 seconds
 		if time.Since(m.LastRefresh) > 10*time.Second && !m.IsLoading {
 			m.LastRefresh = time.Now()
 			cmds = append(cmds, c.loadAllData())
 		}
 		cmds = append(cmds, tickCmd())
 	default:
-		var spinCmd tea.Cmd
+		return c.finalizeUpdate(msg, m)
 		m.Spinner, spinCmd = m.Spinner.Update(msg)
 		cmds = append(cmds, spinCmd)
 	}
 	// Update all bubble components
 	var cmd tea.Cmd
 	m.JobList, cmd = m.JobList.Update(msg)
 	cmds = append(cmds, cmd)
 	m.GpuView, cmd = m.GpuView.Update(msg)
 	cmds = append(cmds, cmd)
 	m.ContainerView, cmd = m.ContainerView.Update(msg)
 	cmds = append(cmds, cmd)
 	m.QueueView, cmd = m.QueueView.Update(msg)
 	cmds = append(cmds, cmd)
 	m.ExperimentsView, cmd = m.ExperimentsView.Update(msg)
 	cmds = append(cmds, cmd)
 	return m, tea.Batch(cmds...)
 }
 // ExperimentsLoadedMsg is sent when experiments are loaded
--- a/cmd/tui/internal/controller/helpers.go
+++ b/cmd/tui/internal/controller/helpers.go
@ -19,6 +19,8 @@ func (c *Controller) getPathForStatus(status model.JobStatus) string {
 		return c.config.FinishedPath()
 	case model.StatusFailed:
 		return c.config.FailedPath()
 	case model.StatusQueued:
 		return c.config.PendingPath() // Queued jobs are in pending directory
 	}
 	return ""
 }
--- a/cmd/tui/internal/controller/settings.go
+++ b/cmd/tui/internal/controller/settings.go
@ -46,7 +46,7 @@ func (c *Controller) updateSettingsContent(m model.State) tea.Cmd {
 	inputContent := fmt.Sprintf("%s Enter New API Key\n%s",
 		getSettingsIndicator(m, 1),
-		m.ApiKeyInput.View())
+		m.APIKeyInput.View())
 	content.WriteString(inputStyle.Render(inputContent))
 	content.WriteString("\n")
@ -72,7 +72,7 @@ func (c *Controller) updateSettingsContent(m model.State) tea.Cmd {
 		Foreground(lipgloss.AdaptiveColor{Light: "#666", Dark: "#999"}).
 		Italic(true)
-	keyContent := fmt.Sprintf("Current API Key: %s", maskAPIKey(m.ApiKey))
+	keyContent := fmt.Sprintf("Current API Key: %s", maskAPIKey(m.APIKey))
 	content.WriteString(keyStyle.Render(keyContent))
 	return func() tea.Msg { return SettingsContentMsg(content.String()) }
@ -85,14 +85,15 @@ func (c *Controller) handleSettingsAction(m *model.State) tea.Cmd {
 	case 1: // Enter New API Key - do nothing, Enter key disabled
 		return nil
 	case 2: // Save Configuration
-		if m.ApiKeyInput.Value() != "" {
+		switch {
-			m.ApiKey = m.ApiKeyInput.Value()
+		case m.APIKeyInput.Value() != "":
-			m.ApiKeyInput.SetValue("")
+			m.APIKey = m.APIKeyInput.Value()
 			m.APIKeyInput.SetValue("")
 			m.Status = "Configuration saved (in-memory only)"
 			return c.updateSettingsContent(*m)
-		} else if m.ApiKey != "" {
+		case m.APIKey != "":
 			m.Status = "Configuration saved (in-memory only)"
-		} else {
+		default:
 			m.ErrorMsg = "No API key to save"
 		}
 	}
@ -109,8 +110,8 @@ func getSettingsIndicator(m model.State, index int) string {
 }
 func getAPIKeyStatus(m model.State) string {
-	if m.ApiKey != "" {
+	if m.APIKey != "" {
-		return "✓ API Key is set\n" + maskAPIKey(m.ApiKey)
+		return "✓ API Key is set\n" + maskAPIKey(m.APIKey)
 	}
 	return "⚠ No API Key configured"
 }
--- a/cmd/tui/internal/model/state.go
+++ b/cmd/tui/internal/model/state.go
@ -1,3 +1,4 @@
 // Package model provides TUI data structures and state management
 package model
 import (
@ -12,28 +13,33 @@ import (
 	"github.com/charmbracelet/lipgloss"
 )
 // ViewMode represents the current view mode in the TUI
 type ViewMode int
 // ViewMode constants represent different TUI views
 const (
-	ViewModeJobs ViewMode = iota
+	ViewModeJobs        ViewMode = iota // Jobs view mode
-	ViewModeGPU
+	ViewModeGPU                         // GPU status view mode
-	ViewModeQueue
+	ViewModeQueue                       // Queue status view mode
-	ViewModeContainer
+	ViewModeContainer                   // Container status view mode
-	ViewModeSettings
+	ViewModeSettings                    // Settings view mode
-	ViewModeDatasets
+	ViewModeDatasets                    // Datasets view mode
-	ViewModeExperiments
+	ViewModeExperiments                 // Experiments view mode
 )
 // JobStatus represents the status of a job
 type JobStatus string
 // JobStatus constants represent different job states
 const (
-	StatusPending  JobStatus = "pending"
+	StatusPending  JobStatus = "pending"  // Job is pending
-	StatusQueued   JobStatus = "queued"
+	StatusQueued   JobStatus = "queued"   // Job is queued
-	StatusRunning  JobStatus = "running"
+	StatusRunning  JobStatus = "running"  // Job is running
-	StatusFinished JobStatus = "finished"
+	StatusFinished JobStatus = "finished" // Job is finished
-	StatusFailed   JobStatus = "failed"
+	StatusFailed   JobStatus = "failed"   // Job is failed
 )
 // Job represents a job in the TUI
 type Job struct {
 	Name     string
 	Status   JobStatus
@ -41,7 +47,10 @@ type Job struct {
 	Priority int64
 }
 // Title returns the job title for display
 func (j Job) Title() string { return j.Name }
 // Description returns a formatted description with status icon
 func (j Job) Description() string {
 	icon := map[JobStatus]string{
 		StatusPending:  "⏸",
@ -56,8 +65,11 @@ func (j Job) Description() string {
 	}
 	return fmt.Sprintf("%s %s%s", icon, j.Status, pri)
 }
 // FilterValue returns the value used for filtering
 func (j Job) FilterValue() string { return j.Name }
 // Task represents a task in the TUI
 type Task struct {
 	ID        string            `json:"id"`
 	JobName   string            `json:"job_name"`
@ -71,6 +83,7 @@ type Task struct {
 	Metadata  map[string]string `json:"metadata,omitempty"`
 }
 // DatasetInfo represents dataset information in the TUI
 type DatasetInfo struct {
 	Name       string    `json:"name"`
 	SizeBytes  int64     `json:"size_bytes"`
@ -91,7 +104,7 @@ type State struct {
 	DatasetView     viewport.Model
 	ExperimentsView viewport.Model
 	Input           textinput.Model
-	ApiKeyInput     textinput.Model
+	APIKeyInput     textinput.Model
 	Status          string
 	ErrorMsg        string
 	InputMode       bool
@ -103,11 +116,12 @@ type State struct {
 	LastRefresh     time.Time
 	IsLoading       bool
 	JobStats        map[JobStatus]int
-	ApiKey          string
+	APIKey          string
 	SettingsIndex   int
 	Keys            KeyMap
 }
 // KeyMap defines key bindings for the TUI
 type KeyMap struct {
 	Refresh         key.Binding
 	Trigger         key.Binding
@ -127,6 +141,7 @@ type KeyMap struct {
 	Quit            key.Binding
 }
 // Keys contains the default key bindings for the TUI
 var Keys = KeyMap{
 	Refresh:         key.NewBinding(key.WithKeys("r"), key.WithHelp("r", "refresh all")),
 	Trigger:         key.NewBinding(key.WithKeys("t"), key.WithHelp("t", "queue job")),
@ -146,6 +161,7 @@ var Keys = KeyMap{
 	Quit:            key.NewBinding(key.WithKeys("q", "ctrl+c"), key.WithHelp("q", "quit")),
 }
 // InitialState creates the initial application state
 func InitialState(apiKey string) State {
 	items := []list.Item{}
 	delegate := list.NewDefaultDelegate()
@ -190,7 +206,7 @@ func InitialState(apiKey string) State {
 		DatasetView:     viewport.New(0, 0),
 		ExperimentsView: viewport.New(0, 0),
 		Input:           input,
-		ApiKeyInput:     apiKeyInput,
+		APIKeyInput:     apiKeyInput,
 		Status:          "Connected",
 		InputMode:       false,
 		ShowHelp:        false,
@ -199,7 +215,7 @@ func InitialState(apiKey string) State {
 		LastRefresh:     time.Now(),
 		IsLoading:       false,
 		JobStats:        make(map[JobStatus]int),
-		ApiKey:          apiKey,
+		APIKey:          apiKey,
 		SettingsIndex:   0,
 		Keys:            Keys,
 	}
--- a/cmd/tui/internal/services/services.go
+++ b/cmd/tui/internal/services/services.go
@ -1,3 +1,4 @@
 // Package services provides TUI service implementations
 package services
 import (
@ -18,6 +19,7 @@ type TaskQueue struct {
 	ctx        context.Context
 }
 // NewTaskQueue creates a new task queue service
 func NewTaskQueue(cfg *config.Config) (*TaskQueue, error) {
 	// Create internal queue config
 	queueCfg := queue.Config{
@ -42,6 +44,7 @@ func NewTaskQueue(cfg *config.Config) (*TaskQueue, error) {
 	}, nil
 }
 // EnqueueTask adds a new task to the queue
 func (tq *TaskQueue) EnqueueTask(jobName, args string, priority int64) (*model.Task, error) {
 	// Create internal task
 	internalTask := &queue.Task{
@ -62,12 +65,13 @@ func (tq *TaskQueue) EnqueueTask(jobName, args string, priority int64) (*model.T
 		JobName:   internalTask.JobName,
 		Args:      internalTask.Args,
 		Status:    "queued",
-		Priority:  int64(internalTask.Priority),
+		Priority:  internalTask.Priority,
 		CreatedAt: internalTask.CreatedAt,
 		Metadata:  internalTask.Metadata,
 	}, nil
 }
 // GetNextTask retrieves the next task from the queue
 func (tq *TaskQueue) GetNextTask() (*model.Task, error) {
 	internalTask, err := tq.internal.GetNextTask()
 	if err != nil {
@ -89,6 +93,7 @@ func (tq *TaskQueue) GetNextTask() (*model.Task, error) {
 	}, nil
 }
 // GetTask retrieves a specific task by ID
 func (tq *TaskQueue) GetTask(taskID string) (*model.Task, error) {
 	internalTask, err := tq.internal.GetTask(taskID)
 	if err != nil {
@ -107,6 +112,7 @@ func (tq *TaskQueue) GetTask(taskID string) (*model.Task, error) {
 	}, nil
 }
 // UpdateTask updates a task's status and metadata
 func (tq *TaskQueue) UpdateTask(task *model.Task) error {
 	// Convert to internal task
 	internalTask := &queue.Task{
@ -122,6 +128,7 @@ func (tq *TaskQueue) UpdateTask(task *model.Task) error {
 	return tq.internal.UpdateTask(internalTask)
 }
 // GetQueuedTasks retrieves all queued tasks
 func (tq *TaskQueue) GetQueuedTasks() ([]*model.Task, error) {
 	internalTasks, err := tq.internal.GetAllTasks()
 	if err != nil {
@ -145,6 +152,7 @@ func (tq *TaskQueue) GetQueuedTasks() ([]*model.Task, error) {
 	return tasks, nil
 }
 // GetJobStatus gets the status of all jobs with the given name
 func (tq *TaskQueue) GetJobStatus(jobName string) (map[string]string, error) {
 	// This method doesn't exist in internal queue, implement basic version
 	task, err := tq.internal.GetTaskByName(jobName)
@ -161,28 +169,35 @@ func (tq *TaskQueue) GetJobStatus(jobName string) (map[string]string, error) {
 	}, nil
 }
 // RecordMetric records a metric for monitoring
 func (tq *TaskQueue) RecordMetric(jobName, metric string, value float64) error {
 	_ = jobName // Parameter reserved for future use
 	return tq.internal.RecordMetric(jobName, metric, value)
 }
-func (tq *TaskQueue) GetMetrics(jobName string) (map[string]string, error) {
+// GetMetrics retrieves metrics for a job
 func (tq *TaskQueue) GetMetrics(_ string) (map[string]string, error) {
 	// This method doesn't exist in internal queue, return empty for now
 	return map[string]string{}, nil
 }
 // ListDatasets retrieves available datasets
 func (tq *TaskQueue) ListDatasets() ([]model.DatasetInfo, error) {
 	// This method doesn't exist in internal queue, return empty for now
 	return []model.DatasetInfo{}, nil
 }
 // CancelTask cancels a task by ID
 func (tq *TaskQueue) CancelTask(taskID string) error {
 	return tq.internal.CancelTask(taskID)
 }
 // ListExperiments retrieves experiment list
 func (tq *TaskQueue) ListExperiments() ([]string, error) {
 	return tq.expManager.ListExperiments()
 }
 // GetExperimentDetails retrieves experiment details
 func (tq *TaskQueue) GetExperimentDetails(commitID string) (string, error) {
 	meta, err := tq.expManager.ReadMetadata(commitID)
 	if err != nil {
@ -211,6 +226,7 @@ func (tq *TaskQueue) GetExperimentDetails(commitID string) (string, error) {
 	return output, nil
 }
 // Close closes the task queue
 func (tq *TaskQueue) Close() error {
 	return tq.internal.Close()
 }
@ -221,6 +237,7 @@ type MLServer struct {
 	addr string
 }
 // NewMLServer creates a new ML server connection
 func NewMLServer(cfg *config.Config) (*MLServer, error) {
 	// Local mode: skip SSH entirely
 	if cfg.Host == "" {
--- a/cmd/tui/internal/view/view.go
+++ b/cmd/tui/internal/view/view.go
@ -1,3 +1,4 @@
 // Package view provides TUI rendering functionality
 package view
 import (
@ -61,6 +62,7 @@ var (
 			Foreground(lipgloss.AdaptiveColor{Light: helpfgLight, Dark: helpfgDark}))
 )
 // Render renders the TUI view
 func Render(m model.State) string {
 	if m.Width == 0 {
 		return "Loading..."
@ -170,6 +172,14 @@ func getRightPanel(m model.State, width int) string {
 		style = activeBorderStyle
 		viewTitle = "🧪 Experiments"
 		content = m.ExperimentsView.View()
 	case model.ViewModeJobs:
 		style = activeBorderStyle
 		viewTitle = "📋 Job Details"
 		content = m.JobList.View()
 	case model.ViewModeDatasets:
 		style = activeBorderStyle
 		viewTitle = "📦 Datasets"
 		content = m.DatasetView.View()
 	default:
 		viewTitle = "📊 System Overview"
 		content = getOverviewPanel(m)
@ -251,5 +261,6 @@ func getQuickHelp(m model.State) string {
 	if m.ActiveView == model.ViewModeSettings {
 		return "  ↑/↓:move  enter:select  esc:exit settings  q:quit"
 	}
-	return "  h:help  1:jobs  2:datasets  3:experiments  v:queue  g:gpu  o:containers  s:settings  t:queue  r:refresh  q:quit"
+	return "  h:help  1:jobs  2:datasets  3:experiments v:queue g:gpu o:containers " +
 		"s:settings t:queue r:refresh q:quit"
 }
--- a/cmd/tui/main.go
+++ b/cmd/tui/main.go
@ -17,21 +17,25 @@ import (
 	"github.com/jfraeys/fetch_ml/internal/logging"
 )
 // AppModel represents the main application model for the TUI.
 type AppModel struct {
 	state      model.State
 	controller *controller.Controller
 }
 // Init initializes the TUI application.
 func (m AppModel) Init() tea.Cmd {
 	return m.controller.Init()
 }
 // Update handles application updates and messages.
 func (m AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 	newState, cmd := m.controller.Update(msg, m.state)
 	m.state = newState
 	return m, cmd
 }
 // View renders the TUI interface.
 func (m AppModel) View() string {
 	return view.Render(m.state)
 }
@ -39,7 +43,7 @@ func (m AppModel) View() string {
 func main() {
 	// Parse authentication flags
 	authFlags := auth.ParseAuthFlags()
-	if err := auth.ValidateAuthFlags(authFlags); err != nil {
+	if err := auth.ValidateFlags(authFlags); err != nil {
 		log.Fatalf("Authentication flag error: %v", err)
 	}
@ -60,36 +64,35 @@ func main() {
 	if err != nil {
 		if configFlag != "" {
 			log.Fatalf("Failed to load TOML config %s: %v", configFlag, err)
 		} else {
 			// Provide helpful error message for data scientists
 			log.Printf("=== Fetch ML TUI - Configuration Required ===")
 			log.Printf("")
 			log.Printf("Error: %v", err)
 			log.Printf("")
 			log.Printf("To get started with the TUI, you need to initialize your configuration:")
 			log.Printf("")
 			log.Printf("Option 1: Using the Zig CLI (Recommended)")
 			log.Printf("  1. Build the CLI: cd cli && make build")
 			log.Printf("  2. Initialize config: ./cli/zig-out/bin/ml init")
 			log.Printf("  3. Edit ~/.ml/config.toml with your settings")
 			log.Printf("  4. Run TUI: ./bin/tui")
 			log.Printf("")
 			log.Printf("Option 2: Manual Configuration")
 			log.Printf("  1. Create directory: mkdir -p ~/.ml")
 			log.Printf("  2. Create config: touch ~/.ml/config.toml")
 			log.Printf("  3. Add your settings to the file")
 			log.Printf("  4. Run TUI: ./bin/tui")
 			log.Printf("")
 			log.Printf("Example ~/.ml/config.toml:")
 			log.Printf("  worker_host = \"localhost\"")
 			log.Printf("  worker_user = \"your_username\"")
 			log.Printf("  worker_base = \"~/ml_jobs\"")
 			log.Printf("  worker_port = 22")
 			log.Printf("  api_key = \"your_api_key_here\"")
 			log.Printf("")
 			log.Printf("For more help, see: https://github.com/jfraeys/fetch_ml/docs")
 			os.Exit(1)
 		}
 		// Provide helpful error message for data scientists
 		log.Printf("=== Fetch ML TUI - Configuration Required ===")
 		log.Printf("")
 		log.Printf("Error: %v", err)
 		log.Printf("")
 		log.Printf("To get started with the TUI, you need to initialize your configuration:")
 		log.Printf("")
 		log.Printf("Option 1: Using the Zig CLI (Recommended)")
 		log.Printf("  1. Build the CLI: cd cli && make build")
 		log.Printf("  2. Initialize config: ./cli/zig-out/bin/ml init")
 		log.Printf("  3. Edit ~/.ml/config.toml with your settings")
 		log.Printf("  4. Run TUI: ./bin/tui")
 		log.Printf("")
 		log.Printf("Option 2: Manual Configuration")
 		log.Printf("  1. Create directory: mkdir -p ~/.ml")
 		log.Printf("  2. Create config: touch ~/.ml/config.toml")
 		log.Printf("  3. Add your settings to the file")
 		log.Printf("  4. Run TUI: ./bin/tui")
 		log.Printf("")
 		log.Printf("Example ~/.ml/config.toml:")
 		log.Printf("  worker_host = \"localhost\"")
 		log.Printf("  worker_user = \"your_username\"")
 		log.Printf("  worker_base = \"~/ml_jobs\"")
 		log.Printf("  worker_port = 22")
 		log.Printf("  api_key = \"your_api_key_here\"")
 		log.Printf("")
 		log.Printf("For more help, see: https://github.com/jfraeys/fetch_ml/docs")
 		os.Exit(1)
 	}
 	cfg = cliConfig.ToTUIConfig()
@ -108,11 +111,12 @@ func main() {
 	if cfg.Auth.Enabled {
 		// Use API key from CLI config if available, otherwise use from flags
 		var effectiveAPIKey string
-		if cliConfig != nil && cliConfig.APIKey != "" {
+		switch {
 		case cliConfig != nil && cliConfig.APIKey != "":
 			effectiveAPIKey = cliConfig.APIKey
-		} else if apiKey != "" {
+		case apiKey != "":
 			effectiveAPIKey = apiKey
-		} else {
+		default:
 			log.Fatal("Authentication required but no API key provided")
 		}
@ -133,7 +137,8 @@ func main() {
 	tq, err := services.NewTaskQueue(cfg)
 	if err != nil {
-		log.Fatalf("Failed to connect to Redis: %v", err)
+		log.Printf("Failed to connect to Redis: %v", err)
 		return
 	}
 	defer func() {
 		if err := tq.Close(); err != nil {
@ -194,11 +199,12 @@ func main() {
 	}()
 	if _, err := p.Run(); err != nil {
-		// Attempt to restore terminal before logging fatal error
+		// Attempt to restore terminal before logging error
-		p.ReleaseTerminal()
+		_ = p.ReleaseTerminal()
-		log.Fatalf("Error running TUI: %v", err)
+		log.Printf("Error running TUI: %v", err)
 		return
 	}
 	// Explicitly restore terminal after program exits
-	p.ReleaseTerminal()
+	_ = p.ReleaseTerminal()
 }
--- a/cmd/user_manager/main.go
+++ b/cmd/user_manager/main.go
@ -1,3 +1,4 @@
 // Package main implements the fetch_ml user management CLI
 package main
 import (
@ -11,8 +12,9 @@ import (
 	"gopkg.in/yaml.v3"
 )
 // ConfigWithAuth wraps auth configuration for user management.
 type ConfigWithAuth struct {
-	Auth auth.AuthConfig `yaml:"auth"`
+	Auth auth.Config `yaml:"auth"`
 }
 func main() {
@ -64,7 +66,7 @@ func main() {
 		if !adminStatus && *role == "" {
 			fmt.Printf("Make user '%s' an admin? (y/N): ", *username)
 			var response string
-			fmt.Scanln(&response)
+			_, _ = fmt.Scanln(&response)
 			adminStatus = strings.ToLower(strings.TrimSpace(response)) == "y"
 		}
--- a/cmd/worker/worker_config.go
+++ b/cmd/worker/worker_config.go
@ -2,13 +2,13 @@ package main
 import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"time"
 	"github.com/google/uuid"
 	"github.com/jfraeys/fetch_ml/internal/auth"
 	"github.com/jfraeys/fetch_ml/internal/config"
 	"github.com/jfraeys/fetch_ml/internal/fileutil"
 	"gopkg.in/yaml.v3"
 )
@ -17,24 +17,26 @@ const (
 	datasetCacheDefaultTTL      = 30 * time.Minute
 )
-// Config holds worker configuration
+// Config holds worker configuration.
 type Config struct {
-	Host          string `yaml:"host"`
+	Host          string                `yaml:"host"`
-	User          string `yaml:"user"`
+	User          string                `yaml:"user"`
-	SSHKey        string `yaml:"ssh_key"`
+	SSHKey        string                `yaml:"ssh_key"`
-	Port          int    `yaml:"port"`
+	Port          int                   `yaml:"port"`
-	BasePath      string `yaml:"base_path"`
+	BasePath      string                `yaml:"base_path"`
-	TrainScript   string `yaml:"train_script"`
+	TrainScript   string                `yaml:"train_script"`
-	RedisAddr     string `yaml:"redis_addr"`
+	RedisAddr     string                `yaml:"redis_addr"`
-	RedisPassword string `yaml:"redis_password"`
+	RedisPassword string                `yaml:"redis_password"`
-	RedisDB       int    `yaml:"redis_db"`
+	RedisDB       int                   `yaml:"redis_db"`
-	KnownHosts    string `yaml:"known_hosts"`
+	KnownHosts    string                `yaml:"known_hosts"`
-	WorkerID      string `yaml:"worker_id"`
+	WorkerID      string                `yaml:"worker_id"`
-	MaxWorkers    int    `yaml:"max_workers"`
+	MaxWorkers    int                   `yaml:"max_workers"`
-	PollInterval  int    `yaml:"poll_interval_seconds"`
+	PollInterval  int                   `yaml:"poll_interval_seconds"`
 	Resources     config.ResourceConfig `yaml:"resources"`
 	LocalMode     bool                  `yaml:"local_mode"`
 	// Authentication
-	Auth auth.AuthConfig `yaml:"auth"`
+	Auth auth.Config `yaml:"auth"`
 	// Metrics exporter
 	Metrics MetricsConfig `yaml:"metrics"`
@ -66,8 +68,9 @@ type MetricsConfig struct {
 	ListenAddr string `yaml:"listen_addr"`
 }
 // LoadConfig loads worker configuration from a YAML file.
 func LoadConfig(path string) (*Config, error) {
-	data, err := os.ReadFile(path)
+	data, err := fileutil.SecureFileRead(path)
 	if err != nil {
 		return nil, err
 	}
@ -98,8 +101,11 @@ func LoadConfig(path string) (*Config, error) {
 	if cfg.WorkerID == "" {
 		cfg.WorkerID = fmt.Sprintf("worker-%s", uuid.New().String()[:8])
 	}
-	if cfg.MaxWorkers == 0 {
+	cfg.Resources.ApplyDefaults()
-		cfg.MaxWorkers = smart.MaxWorkers()
+	if cfg.MaxWorkers > 0 {
 		cfg.Resources.MaxWorkers = cfg.MaxWorkers
 	} else {
 		cfg.MaxWorkers = cfg.Resources.MaxWorkers
 	}
 	if cfg.PollInterval == 0 {
 		cfg.PollInterval = smart.PollInterval()
@ -141,7 +147,7 @@ func LoadConfig(path string) (*Config, error) {
 	return &cfg, nil
 }
-// Validate implements config.Validator interface
+// Validate implements config.Validator interface.
 func (c *Config) Validate() error {
 	if c.Port != 0 {
 		if err := config.ValidatePort(c.Port); err != nil {
--- a/cmd/worker/worker_server.go
+++ b/cmd/worker/worker_server.go
@ -19,7 +19,8 @@ import (
 	"github.com/jfraeys/fetch_ml/internal/auth"
 	"github.com/jfraeys/fetch_ml/internal/config"
 	"github.com/jfraeys/fetch_ml/internal/container"
-	"github.com/jfraeys/fetch_ml/internal/errors"
+	"github.com/jfraeys/fetch_ml/internal/errtypes"
 	"github.com/jfraeys/fetch_ml/internal/fileutil"
 	"github.com/jfraeys/fetch_ml/internal/logging"
 	"github.com/jfraeys/fetch_ml/internal/metrics"
 	"github.com/jfraeys/fetch_ml/internal/network"
@ -30,19 +31,33 @@ import (
 	"github.com/prometheus/client_golang/prometheus/promhttp"
 )
-// MLServer wraps network.SSHClient for backward compatibility
+// MLServer wraps network.SSHClient for backward compatibility.
 type MLServer struct {
 	*network.SSHClient
 }
 // isValidName validates that input strings contain only safe characters.
 // isValidName checks if the input string is a valid name.
 func isValidName(input string) bool {
 	return len(input) > 0 && len(input) < 256
 }
 // NewMLServer creates a new ML server connection.
 // NewMLServer returns a new MLServer instance.
 func NewMLServer(cfg *Config) (*MLServer, error) {
 	if cfg.LocalMode {
 		return &MLServer{SSHClient: network.NewLocalClient(cfg.BasePath)}, nil
 	}
 	client, err := network.NewSSHClient(cfg.Host, cfg.User, cfg.SSHKey, cfg.Port, cfg.KnownHosts)
 	if err != nil {
 		return nil, err
 	}
 	return &MLServer{SSHClient: client}, nil
 }
 // Worker represents an ML task worker.
 type Worker struct {
 	id         string
 	config     *Config
@ -66,9 +81,9 @@ type Worker struct {
 	gracefulWait sync.WaitGroup
 }
-func (w *Worker) setupMetricsExporter() error {
+func (w *Worker) setupMetricsExporter() {
 	if !w.config.Metrics.Enabled {
-		return nil
+		return
 	}
 	reg := prometheus.NewRegistry()
@ -154,11 +169,10 @@ func (w *Worker) setupMetricsExporter() error {
 				"error", err)
 		}
 	}()
 	return nil
 }
-func NewWorker(cfg *Config, apiKey string) (*Worker, error) {
+// NewWorker creates a new worker instance.
 func NewWorker(cfg *Config, _ string) (*Worker, error) {
 	srv, err := NewMLServer(cfg)
 	if err != nil {
 		return nil, err
@ -205,13 +219,12 @@ func NewWorker(cfg *Config, apiKey string) (*Worker, error) {
 		shutdownCh:      make(chan struct{}),
 	}
-	if err := worker.setupMetricsExporter(); err != nil {
+	worker.setupMetricsExporter()
 		return nil, err
 	}
 	return worker, nil
 }
 // Start starts the worker's main processing loop.
 func (w *Worker) Start() {
 	w.logger.Info("worker started",
 		"worker_id", w.id,
@ -235,7 +248,8 @@ func (w *Worker) Start() {
 		}
 		queueStart := time.Now()
-		task, err := w.queue.GetNextTaskWithLease(w.config.WorkerID, w.config.TaskLeaseDuration)
+		blockTimeout := time.Duration(w.config.PollInterval) * time.Second
 		task, err := w.queue.GetNextTaskWithLeaseBlocking(w.config.WorkerID, w.config.TaskLeaseDuration, blockTimeout)
 		queueLatency := time.Since(queueStart)
 		if err != nil {
 			if err == context.DeadlineExceeded {
@ -289,7 +303,7 @@ func (w *Worker) heartbeat() {
 	}
 }
-// NEW: Fetch datasets using data_manager
+// NEW: Fetch datasets using data_manager.
 func (w *Worker) fetchDatasets(ctx context.Context, task *queue.Task) error {
 	logger := w.logger.Job(ctx, task.JobName, task.ID)
 	logger.Info("fetching datasets",
@ -315,6 +329,12 @@ func (w *Worker) fetchDatasets(ctx context.Context, task *queue.Task) error {
 		// Create command with context for cancellation support
 		cmdCtx, cancel := context.WithTimeout(ctx, 30*time.Minute)
 		// Validate inputs to prevent command injection
 		if !isValidName(task.JobName) || !isValidName(dataset) {
 			cancel()
 			return fmt.Errorf("invalid input: jobName or dataset contains unsafe characters")
 		}
 		//nolint:gosec // G204: Subprocess launched with potential tainted input - input is validated
 		cmd := exec.CommandContext(cmdCtx,
 			w.config.DataManagerPath,
 			"fetch",
@ -326,7 +346,7 @@ func (w *Worker) fetchDatasets(ctx context.Context, task *queue.Task) error {
 		cancel() // Clean up context
 		if err != nil {
-			return &errors.DataFetchError{
+			return &errtypes.DataFetchError{
 				Dataset: dataset,
 				JobName: task.JobName,
 				Err:     fmt.Errorf("command failed: %w, output: %s", err, output),
@ -342,10 +362,10 @@ func (w *Worker) fetchDatasets(ctx context.Context, task *queue.Task) error {
 	return nil
 }
-func (w *Worker) runJob(task *queue.Task) error {
+func (w *Worker) runJob(ctx context.Context, task *queue.Task) error {
 	// Validate job name to prevent path traversal
 	if err := container.ValidateJobName(task.JobName); err != nil {
-		return &errors.TaskExecutionError{
+		return &errtypes.TaskExecutionError{
 			TaskID:  task.ID,
 			JobName: task.JobName,
 			Phase:   "validation",
@ -354,14 +374,36 @@ func (w *Worker) runJob(task *queue.Task) error {
 	}
 	jobPaths := config.NewJobPaths(w.config.BasePath)
-	jobDir := filepath.Join(jobPaths.PendingPath(), task.JobName)
+	pendingDir := jobPaths.PendingPath()
 	jobDir := filepath.Join(pendingDir, task.JobName)
 	outputDir := filepath.Join(jobPaths.RunningPath(), task.JobName)
 	logFile := filepath.Join(outputDir, "output.log")
 	// Create pending directory
 	if err := os.MkdirAll(pendingDir, 0750); err != nil {
 		return &errtypes.TaskExecutionError{
 			TaskID:  task.ID,
 			JobName: task.JobName,
 			Phase:   "setup",
 			Err:     fmt.Errorf("failed to create pending dir: %w", err),
 		}
 	}
 	// Create job directory in pending
 	if err := os.MkdirAll(jobDir, 0750); err != nil {
 		return &errtypes.TaskExecutionError{
 			TaskID:  task.ID,
 			JobName: task.JobName,
 			Phase:   "setup",
 			Err:     fmt.Errorf("failed to create job dir: %w", err),
 		}
 	}
 	// Sanitize paths
-	jobDir, err := container.SanitizePath(jobDir)
+	var err error
 	jobDir, err = container.SanitizePath(jobDir)
 	if err != nil {
-		return &errors.TaskExecutionError{
+		return &errtypes.TaskExecutionError{
 			TaskID:  task.ID,
 			JobName: task.JobName,
 			Phase:   "validation",
@ -370,7 +412,7 @@ func (w *Worker) runJob(task *queue.Task) error {
 	}
 	outputDir, err = container.SanitizePath(outputDir)
 	if err != nil {
-		return &errors.TaskExecutionError{
+		return &errtypes.TaskExecutionError{
 			TaskID:  task.ID,
 			JobName: task.JobName,
 			Phase:   "validation",
@ -380,12 +422,12 @@ func (w *Worker) runJob(task *queue.Task) error {
 	// Create output directory
 	if _, err := telemetry.ExecWithMetrics(w.logger, "create output dir", 100*time.Millisecond, func() (string, error) {
-		if err := os.MkdirAll(outputDir, 0755); err != nil {
+		if err := os.MkdirAll(outputDir, 0750); err != nil {
 			return "", fmt.Errorf("mkdir failed: %w", err)
 		}
 		return "", nil
 	}); err != nil {
-		return &errors.TaskExecutionError{
+		return &errtypes.TaskExecutionError{
 			TaskID:  task.ID,
 			JobName: task.JobName,
 			Phase:   "setup",
@ -396,12 +438,18 @@ func (w *Worker) runJob(task *queue.Task) error {
 	// Move job from pending to running
 	stagingStart := time.Now()
 	if _, err := telemetry.ExecWithMetrics(w.logger, "stage job", 100*time.Millisecond, func() (string, error) {
 		// Remove existing directory if it exists
 		if _, err := os.Stat(outputDir); err == nil {
 			if err := os.RemoveAll(outputDir); err != nil {
 				return "", fmt.Errorf("remove existing failed: %w", err)
 			}
 		}
 		if err := os.Rename(jobDir, outputDir); err != nil {
 			return "", fmt.Errorf("rename failed: %w", err)
 		}
 		return "", nil
 	}); err != nil {
-		return &errors.TaskExecutionError{
+		return &errtypes.TaskExecutionError{
 			TaskID:  task.ID,
 			JobName: task.JobName,
 			Phase:   "setup",
@ -410,8 +458,87 @@ func (w *Worker) runJob(task *queue.Task) error {
 	}
 	stagingDuration := time.Since(stagingStart)
 	// In local mode, execute directly without podman
 	if w.config.LocalMode {
 		// Create experiment script
 		scriptContent := `#!/bin/bash
 set -e
 echo "Starting experiment: ` + task.JobName + `"
 echo "Task ID: ` + task.ID + `"
 echo "Timestamp: $(date)"
 # Simulate ML experiment
 echo "Loading data..."
 sleep 1
 echo "Training model..."
 sleep 2
 echo "Evaluating model..."
 sleep 1
 # Generate results
 ACCURACY=0.95
 LOSS=0.05
 EPOCHS=10
 echo ""
 echo "=== EXPERIMENT RESULTS ==="
 echo "Accuracy: $ACCURACY"
 echo "Loss: $LOSS"
 echo "Epochs: $EPOCHS"
 echo "Status: SUCCESS"
 echo "========================="
 echo "Experiment completed successfully!"
 `
 		scriptPath := filepath.Join(outputDir, "run.sh")
 		if err := os.WriteFile(scriptPath, []byte(scriptContent), 0755); err != nil {
 			return &errtypes.TaskExecutionError{
 				TaskID:  task.ID,
 				JobName: task.JobName,
 				Phase:   "execution",
 				Err:     fmt.Errorf("failed to write script: %w", err),
 			}
 		}
 		logFileHandle, err := fileutil.SecureOpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0600)
 		if err != nil {
 			w.logger.Warn("failed to open log file for local output", "path", logFile, "error", err)
 			return &errtypes.TaskExecutionError{
 				TaskID:  task.ID,
 				JobName: task.JobName,
 				Phase:   "execution",
 				Err:     fmt.Errorf("failed to open log file: %w", err),
 			}
 		}
 		defer logFileHandle.Close()
 		// Execute the script directly
 		localCmd := exec.CommandContext(ctx, "bash", scriptPath)
 		localCmd.Stdout = logFileHandle
 		localCmd.Stderr = logFileHandle
 		w.logger.Info("executing local job",
 			"job", task.JobName,
 			"task_id", task.ID,
 			"script", scriptPath)
 		if err := localCmd.Run(); err != nil {
 			return &errtypes.TaskExecutionError{
 				TaskID:  task.ID,
 				JobName: task.JobName,
 				Phase:   "execution",
 				Err:     fmt.Errorf("execution failed: %w", err),
 			}
 		}
 		return nil
 	}
 	if w.config.PodmanImage == "" {
-		return &errors.TaskExecutionError{
+		return &errtypes.TaskExecutionError{
 			TaskID:  task.ID,
 			JobName: task.JobName,
 			Phase:   "validation",
@ -446,8 +573,8 @@ func (w *Worker) runJob(task *queue.Task) error {
 	}
 	ioBefore, ioErr := telemetry.ReadProcessIO()
-	podmanCmd := container.BuildPodmanCommand(podmanCfg, scriptPath, requirementsPath, extraArgs)
+	podmanCmd := container.BuildPodmanCommand(ctx, podmanCfg, scriptPath, requirementsPath, extraArgs)
-	logFileHandle, err := os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644)
+	logFileHandle, err := fileutil.SecureOpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0600)
 	if err == nil {
 		podmanCmd.Stdout = logFileHandle
 		podmanCmd.Stderr = logFileHandle
@ -586,6 +713,7 @@ func (w *Worker) markDatasetFetched(dataset string) {
 	w.datasetCacheMu.Unlock()
 }
 // GetMetrics returns current worker metrics.
 func (w *Worker) GetMetrics() map[string]any {
 	stats := w.metrics.GetStats()
 	stats["worker_id"] = w.id
@ -593,6 +721,7 @@ func (w *Worker) GetMetrics() map[string]any {
 	return stats
 }
 // Stop gracefully shuts down the worker.
 func (w *Worker) Stop() {
 	w.cancel()
 	w.waitForTasks()
@ -614,7 +743,7 @@ func (w *Worker) Stop() {
 	w.logger.Info("worker stopped", "worker_id", w.id)
 }
-// Execute task with lease management and retry:
+// Execute task with lease management and retry.
 func (w *Worker) executeTaskWithLease(task *queue.Task) {
 	// Track task for graceful shutdown
 	w.gracefulWait.Add(1)
@ -695,7 +824,7 @@ func (w *Worker) executeTaskWithLease(task *queue.Task) {
 				execErr = fmt.Errorf("panic during execution: %v", r)
 			}
 		}()
-		execErr = w.runJob(task)
+		execErr = w.runJob(taskCtx, task)
 	}()
 	// Finalize task
@ -711,21 +840,30 @@ func (w *Worker) executeTaskWithLease(task *queue.Task) {
 				"task_id", task.ID,
 				"error", execErr,
 				"retry_count", task.RetryCount)
-			w.queue.RetryTask(task)
+			_ = w.queue.RetryTask(task)
 		} else {
 			task.Status = "failed"
-			w.queue.UpdateTaskWithMetrics(task, "final")
+			_ = w.queue.UpdateTaskWithMetrics(task, "final")
 		}
 	} else {
 		task.Status = "completed"
-		w.queue.UpdateTaskWithMetrics(task, "final")
+
 		// Read output file for completed tasks
 		jobPaths := config.NewJobPaths(w.config.BasePath)
 		outputDir := filepath.Join(jobPaths.RunningPath(), task.JobName)
 		logFile := filepath.Join(outputDir, "output.log")
 		if outputBytes, err := os.ReadFile(logFile); err == nil {
 			task.Output = string(outputBytes)
 		}
 		_ = w.queue.UpdateTaskWithMetrics(task, "final")
 	}
 	// Release lease
-	w.queue.ReleaseLease(task.ID, w.config.WorkerID)
+	_ = w.queue.ReleaseLease(task.ID, w.config.WorkerID)
 }
-// Heartbeat loop to renew lease:
+// Heartbeat loop to renew lease.
 func (w *Worker) heartbeatLoop(ctx context.Context, taskID string) {
 	ticker := time.NewTicker(w.config.HeartbeatInterval)
 	defer ticker.Stop()
@ -740,12 +878,12 @@ func (w *Worker) heartbeatLoop(ctx context.Context, taskID string) {
 				return
 			}
 			// Also update worker heartbeat
-			w.queue.Heartbeat(w.config.WorkerID)
+			_ = w.queue.Heartbeat(w.config.WorkerID)
 		}
 	}
 }
-// Graceful shutdown:
+// Shutdown gracefully shuts down the worker.
 func (w *Worker) Shutdown() error {
 	w.logger.Info("starting graceful shutdown", "active_tasks", w.countActiveTasks())
@ -768,9 +906,9 @@ func (w *Worker) Shutdown() error {
 	return w.queue.Close()
 }
-// Release all active leases:
+// Release all active leases.
 func (w *Worker) releaseAllLeases() {
-	w.activeTasks.Range(func(key, value interface{}) bool {
+	w.activeTasks.Range(func(key, _ interface{}) bool {
 		taskID := key.(string)
 		if err := w.queue.ReleaseLease(taskID, w.config.WorkerID); err != nil {
 			w.logger.Error("failed to release lease", "task_id", taskID, "error", err)
@ -779,7 +917,7 @@ func (w *Worker) releaseAllLeases() {
 	})
 }
-// Helper functions:
+// Helper functions.
 func (w *Worker) countActiveTasks() int {
 	count := 0
 	w.activeTasks.Range(func(_, _ interface{}) bool {
@ -816,7 +954,7 @@ func main() {
 	// Parse authentication flags
 	authFlags := auth.ParseAuthFlags()
-	if err := auth.ValidateAuthFlags(authFlags); err != nil {
+	if err := auth.ValidateFlags(authFlags); err != nil {
 		log.Fatalf("Authentication flag error: %v", err)
 	}
--- a/configs/config-debug.yaml
+++ b/configs/config-debug.yaml
@ -0,0 +1,17 @@
 base_path: "/app/data/experiments"
 auth:
  enabled: false
 server:
  address: ":9101"
 database:
  type: "sqlite"
  connection: "/app/data/experiments/fetch_ml.db"
 redis:
  url: "redis://redis:6379"
 logging:
  level: "debug"
--- a/configs/config-dev.yaml
+++ b/configs/config-dev.yaml
@ -1,36 +0,0 @@
 base_path: "./data/experiments"
 auth:
  enabled: true
  apikeys:
    test_user:
      hash: "02d4e2b0d8b4869a34511cc01ff1ebbc3cac581a6b361988106eaedca9886a38"
      admin: true
      roles: ["data_scientist", "admin"]
      permissions:
        read: true
        write: true
        delete: true
 server:
  address: ":9102"
  tls:
    enabled: false
 security:
  rate_limit:
    enabled: true
    requests_per_minute: 60
    burst_size: 10
  ip_whitelist:
    - "127.0.0.1"
    - "::1"
    - "localhost"
 redis:
  url: "redis://localhost:6379"
  password: "${REDIS_PASSWORD}"
 logging:
  level: "info"
  file: ""  # Empty = stderr only (dev mode)
--- a/configs/config-docker-full.yaml
+++ b/configs/config-docker-full.yaml
@ -0,0 +1,46 @@
 base_path: "/app/data/experiments"
 auth:
  enabled: true
  api_keys:
    homelab_user:
      hash: "5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8" # "password"
      admin: true
      roles: ["user", "admin"]
      permissions:
        read: true
        write: true
        delete: true
 server:
  address: ":9101"
  tls:
    enabled: true
    cert_file: "/app/ssl/cert.pem"
    key_file: "/app/ssl/key.pem"
 security:
  rate_limit:
    enabled: true
    requests_per_minute: 30
  ip_whitelist: []
 # SQLite database for persistence
 database:
  type: "sqlite"
  connection: "/app/data/fetch_ml.db"
 redis:
  url: "redis://redis:6379"
  max_connections: 10
 logging:
  level: "info"
  file: "/app/logs/app.log"
  audit_file: "/app/logs/audit.log"
 resources:
  max_workers: 1
  desired_rps_per_worker: 2
  podman_cpus: "2"
  podman_memory: "8g"
--- a/configs/config-docker.yaml
+++ b/configs/config-docker.yaml
@ -37,3 +37,9 @@ logging:
  level: "info"
  file: "/app/logs/app.log"
  audit_file: "/app/logs/audit.log"
 resources:
  max_workers: 1
  desired_rps_per_worker: 2
  podman_cpus: "2"
  podman_memory: "8g"
--- a/configs/config-homelab-secure.yaml
+++ b/configs/config-homelab-secure.yaml
@ -0,0 +1,86 @@
 base_path: "/app/data/experiments"
 auth:
  enabled: true
  api_keys:
    homelab_user:
      hash: "5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8" # "password"
      admin: true
      roles: ["user", "admin"]
      permissions:
        read: true
        write: true
        delete: true
 server:
  address: ":9101"
  tls:
    enabled: true
    cert_file: "/app/ssl/cert.pem"
    key_file: "/app/ssl/key.pem"
    min_version: "1.3"
    cipher_suites:
      - "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384"
      - "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384"
      - "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256"
      - "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256"
 security:
  rate_limit:
    enabled: true
    requests_per_minute: 30
    burst_size: 10
  ip_whitelist: []  # Open for homelab use, consider restricting
  cors:
    enabled: true
    allowed_origins:
      - "https://localhost:9103"
      - "https://localhost:3000"  # Grafana
    allowed_methods: ["GET", "POST", "PUT", "DELETE", "OPTIONS"]
    allowed_headers: ["Content-Type", "Authorization"]
  csrf:
    enabled: true
  security_headers:
    X-Content-Type-Options: "nosniff"
    X-Frame-Options: "DENY"
    X-XSS-Protection: "1; mode=block"
    Strict-Transport-Security: "max-age=31536000; includeSubDomains"
 # SQLite database with security settings
 database:
  type: "sqlite"
  connection: "/app/data/experiments/fetch_ml.db"
  max_connections: 10
  connection_timeout: "30s"
  max_idle_time: "1h"
 redis:
  url: "redis://redis:6379"
  max_connections: 10
  connection_timeout: "10s"
  read_timeout: "5s"
  write_timeout: "5s"
 logging:
  level: "info"
  file: "/app/logs/app.log"
  audit_file: "/app/logs/audit.log"
  max_size: "100MB"
  max_backups: 5
  compress: true
 resources:
  max_workers: 2
  desired_rps_per_worker: 3
  podman_cpus: "2"
  podman_memory: "4g"
  job_timeout: "30m"
  cleanup_interval: "1h"
 monitoring:
  enabled: true
  metrics_path: "/metrics"
  health_check_interval: "30s"
  prometheus:
    enabled: true
    listen_addr: ":9100"
--- a/configs/config-local.yaml
+++ b/configs/config-local.yaml
@ -1,33 +0,0 @@
 auth:
    enabled: true
    apikeys:
        dev_user:
            hash: 2baf1f40105d9501fe319a8ec463fdf4325a2a5df445adf3f572f626253678c9
            admin: true
            roles:
                - admin
            permissions:
                '*': true
 server:
    address: ":9101"
    tls:
        enabled: false
 security:
    rate_limit:
        enabled: false
    ip_whitelist:
        - "127.0.0.1"
        - "::1"
        - "localhost"
        - "10.0.0.0/8"
        - "192.168.0.0/16"
        - "172.16.0.0/12"
 # Prometheus metrics
 metrics:
    enabled: true
    listen_addr: ":9100"
    tls:
        enabled: false
--- a/configs/config-multi-user.yaml
+++ b/configs/config-multi-user.yaml
@ -0,0 +1,78 @@
 base_path: "/app/data/experiments"
 auth:
  enabled: true
  api_keys:
    admin_user:
      hash: "5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8" # "password"
      admin: true
      roles: ["user", "admin"]
      permissions:
        read: true
        write: true
        delete: true
    researcher1:
      hash: "ef92b778ba7a6c8f2150019a5678047b6a9a2b95cef8189518f9b35c54d2e3ae" # "research123"
      admin: false
      roles: ["user", "researcher"]
      permissions:
        jobs:read: true
        jobs:create: true
        jobs:update: true
        jobs:delete: false
    analyst1:
      hash: "a665a45920422f9d417e4867efdc4fb8a04a1f3fff1fa07e998e86f7f7a27ae3" # "analyst123"
      admin: false
      roles: ["user", "analyst"]
      permissions:
        jobs:read: true
        jobs:create: false
        jobs:update: false
        jobs:delete: false
 server:
  address: ":9101"
  tls:
    enabled: false
 security:
  rate_limit:
    enabled: true
    requests_per_minute: 60
    burst_size: 20
  ip_whitelist: []
  cors:
    enabled: true
    allowed_origins: ["https://localhost:9103", "https://localhost:3000"]
    allowed_methods: ["GET", "POST", "PUT", "DELETE", "OPTIONS"]
    allowed_headers: ["Content-Type", "Authorization"]
 database:
  type: "sqlite"
  connection: "/app/data/experiments/fetch_ml.db"
  max_connections: 20
  connection_timeout: "30s"
 redis:
  url: "redis://redis:6379"
  max_connections: 15
  connection_timeout: "10s"
 logging:
  level: "info"
  file: "/app/logs/app.log"
  max_size: "100MB"
  max_backups: 5
  compress: true
 resources:
  max_workers: 3
  desired_rps_per_worker: 3
  podman_cpus: "2"
  podman_memory: "4g"
  job_timeout: "30m"
 monitoring:
  enabled: true
  metrics_path: "/metrics"
  health_check_interval: "30s"
--- a/configs/config-no-tls.yaml
+++ b/configs/config-no-tls.yaml
@ -1,27 +0,0 @@
 base_path: "./data/experiments"
 auth:
  enabled: true
 server:
  address: ":9102"
  tls:
    enabled: false
 security:
  rate_limit:
    enabled: true
    requests_per_minute: 60
    burst_size: 10
  ip_whitelist:
    - "127.0.0.1"
    - "::1"
    - "localhost"
 redis:
  url: "redis://localhost:6379"
  password: "${REDIS_PASSWORD}"
 logging:
  level: "info"
  file: "./logs/fetch_ml.log"
--- a/configs/config-prod.yaml
+++ b/configs/config-prod.yaml
@ -51,3 +51,9 @@ logging:
  level: "info"
  file: "logs/fetch_ml.log"
  audit_log: "logs/audit.log"
 resources:
  max_workers: 2
  desired_rps_per_worker: 5
  podman_cpus: "8"
  podman_memory: "32g"
--- a/configs/schema/worker_config_schema.yaml
+++ b/configs/schema/worker_config_schema.yaml
@ -0,0 +1,106 @@
 $schema: "http://json-schema.org/draft-07/schema#"
 title: "FetchML Worker Configuration"
 type: object
 additionalProperties: false
 required:
  - base_path
  - worker_id
  - redis_addr
  - podman_image
  - container_workspace
  - container_results
  - train_script
 properties:
  host:
    type: string
  user:
    type: string
  ssh_key:
    type: string
  port:
    type: integer
    minimum: 1
    maximum: 65535
  base_path:
    type: string
  train_script:
    type: string
  redis_addr:
    type: string
  redis_password:
    type: string
  redis_db:
    type: integer
    minimum: 0
  known_hosts:
    type: string
  worker_id:
    type: string
    minLength: 1
  max_workers:
    type: integer
    minimum: 1
  poll_interval_seconds:
    type: integer
    minimum: 1
  resources:
    type: object
    additionalProperties: false
    properties:
      max_workers:
        type: integer
        minimum: 1
      desired_rps_per_worker:
        type: integer
        minimum: 1
      requests_per_sec:
        type: integer
        minimum: 1
      podman_cpus:
        type: string
      podman_memory:
        type: string
      request_burst:
        type: integer
        minimum: 1
  auth:
    type: object
    additionalProperties: true
  metrics:
    type: object
    additionalProperties: false
    properties:
      enabled:
        type: boolean
      listen_addr:
        type: string
  metrics_flush_interval:
    type: string
    description: Duration string (e.g., "500ms")
  data_manager_path:
    type: string
  auto_fetch_data:
    type: boolean
  data_dir:
    type: string
  dataset_cache_ttl:
    type: string
    description: Duration string (e.g., "24h")
  podman_image:
    type: string
    minLength: 1
  container_workspace:
    type: string
  container_results:
    type: string
  gpu_access:
    type: boolean
  task_lease_duration:
    type: string
  heartbeat_interval:
    type: string
  max_retries:
    type: integer
    minimum: 0
  graceful_timeout:
    type: string
--- a/configs/worker-docker.yaml
+++ b/configs/worker-docker.yaml
@ -0,0 +1,51 @@
 # Worker configuration for Docker production-like testing
 worker_id: "docker-test-worker-1"
 # Redis configuration
 redis:
  url: "redis://redis:6379"
  max_connections: 10
 # Local mode settings
 local_mode: false  # Use Podman for containerized job execution
 # Job paths
 base_path: "/tmp/fetchml-jobs"
 # Container workspace (not used in local mode)
 container_workspace: "/workspace"
 container_results: "/results"
 # Podman settings (not used in local mode)
 podman_image: "python:3.9-slim"
 podman_cpus: "2"
 podman_memory: "4g"
 # Worker configuration
 heartbeat_interval: "30s"
 lease_duration: "5m"
 max_concurrent_tasks: 1
 # Data manager settings
 data_manager:
  enabled: false
  base_path: "/data"
 # SSH settings for Podman communication
 ssh:
  enabled: true
  host: "localhost"
  port: 2222
  user: "worker"
  password: "SecureWorkerPass2024!"
  key_path: "/home/worker/.ssh/id_rsa"
 # Logging
 logging:
  level: "info"
  file: "/logs/worker.log"
 # Metrics
 metrics:
  enabled: true
  endpoint: ":9100"
--- a/configs/worker-homelab-secure.yaml
+++ b/configs/worker-homelab-secure.yaml
@ -0,0 +1,79 @@
 # Worker configuration for Homelab secure environment
 worker_id: "homelab-secure-worker-1"
 # Redis configuration with connection pooling
 redis:
  url: "redis://redis:6379"
  max_connections: 10
  connection_timeout: "10s"
  read_timeout: "5s"
  write_timeout: "5s"
 # Local mode disabled for containerized execution
 local_mode: false
 # Job paths with security considerations
 base_path: "/tmp/fetchml-jobs"
 container_workspace: "/workspace"
 container_results: "/results"
 # Podman settings with resource limits
 podman_image: "python:3.11-slim"
 podman_cpus: "2"
 podman_memory: "4g"
 podman_network: "ml-job-network"
 podman_timeout: "30m"
 # Worker configuration with security
 heartbeat_interval: "30s"
 lease_duration: "5m"
 max_concurrent_tasks: 2
 task_timeout: "30m"
 # Data manager settings
 data_manager:
  enabled: true
  base_path: "/data"
  encryption_enabled: true
  backup_enabled: true
 # SSH settings with secure configuration
 ssh:
  enabled: true
  host: "localhost"
  port: 2222
  user: "worker"
  password: "HomelabWorker2024!"
  key_path: "/home/worker/.ssh/id_rsa"
  max_retries: 3
  connection_timeout: "30s"
  strict_host_key_checking: false
 # Logging with rotation and security
 logging:
  level: "info"
  file: "/logs/worker.log"
  max_size: "50MB"
  max_backups: 5
  compress: true
  audit_enabled: true
 # Metrics and monitoring
 metrics:
  enabled: true
  endpoint: ":9100"
  path: "/metrics"
 # Security settings
 security:
  enable_job_isolation: true
  sandbox_enabled: true
  resource_monitoring: true
  audit_commands: true
 # Health check configuration
 health_check:
  enabled: true
  interval: "30s"
  timeout: "10s"
  failure_threshold: 3
--- a/configs/worker-prod.toml
+++ b/configs/worker-prod.toml
@ -20,6 +20,12 @@ container_workspace = "/workspace"
 container_results = "/results"
 train_script = "train.py"
 [resources]
 max_workers = 4
 desired_rps_per_worker = 2
 podman_cpus = "4"
 podman_memory = "16g"
 # Dataset management
 auto_fetch_data = true
 data_dir = "/data/datasets"
--- a/docker-compose.homelab-secure.yml
+++ b/docker-compose.homelab-secure.yml
@ -0,0 +1,104 @@
 # Homelab Secure Docker Environment
 services:
  redis:
    image: redis:7-alpine
    container_name: ml-homelab-redis
    ports:
      - "6379:6379"
    volumes:
      - redis_homelab_data:/data
    restart: unless-stopped
    command: >
      redis-server 
      --appendonly yes 
      --requirepass "HomelabRedis2024!"
      --maxmemory 512mb
      --maxmemory-policy allkeys-lru
    healthcheck:
      test: ["CMD", "redis-cli", "-a", "HomelabRedis2024!", "ping"]
      interval: 30s
      timeout: 10s
      retries: 3
    networks:
      - ml-homelab-network
  api-server:
    build:
      context: .
      dockerfile: build/docker/homelab-secure.Dockerfile
    container_name: ml-homelab-api
    ports:
      - "9104:9101"  # API server port
      - "2223:2222"  # Secure SSH port
      - "9101:9100"  # Prometheus metrics
    volumes:
      - ./data:/app/data/experiments
      - ./logs:/logs
      - ./configs/config-homelab-secure.yaml:/app/configs/config.yaml
    depends_on:
      redis:
        condition: service_healthy
    restart: unless-stopped
    environment:
      - REDIS_URL=redis://:HomelabRedis2024!@redis:6379
      - LOG_LEVEL=info
      - TZ=America/New_York
    healthcheck:
      test: ["CMD", "curl", "-k", "-f", "https://localhost:9101/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
    command: >
      sh -c "
        sudo /app/start-security.sh &
        /usr/local/bin/api-server -config /app/configs/config.yaml
      "
    networks:
      - ml-homelab-network
  worker:
    build:
      context: .
      dockerfile: build/docker/homelab-secure.Dockerfile
    container_name: ml-homelab-worker
    volumes:
      - ./data:/app/data/experiments
      - ./logs:/logs
      - ./configs/worker-homelab-secure.yaml:/app/configs/worker.yaml
    depends_on:
      redis:
        condition: service_healthy
      api-server:
        condition: service_healthy
    restart: unless-stopped
    environment:
      - REDIS_URL=redis://:HomelabRedis2024!@redis:6379
      - LOG_LEVEL=info
      - TZ=America/New_York
    privileged: true  # Required for Podman
    security_opt:
      - no-new-privileges:true
    cap_drop:
      - ALL
    cap_add:
      - NET_ADMIN
      - SYS_ADMIN
    command: >
      sh -c "
        sudo /app/start-security.sh &
        /usr/local/bin/worker -config /app/configs/worker.yaml
      "
    networks:
      - ml-homelab-network
 volumes:
  redis_homelab_data:
    driver: local
 networks:
  ml-homelab-network:
    driver: bridge
    ipam:
      config:
        - subnet: 172.25.0.0/16
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@ -0,0 +1,74 @@
 # Full Production Docker Environment with Podman and SQLite
 services:
  redis:
    image: redis:7-alpine
    container_name: ml-prod-redis
    ports:
      - "6379:6379"
    volumes:
      - redis_prod_data:/data
    restart: unless-stopped
    command: redis-server --appendonly yes
    healthcheck:
      test: [ "CMD", "redis-cli", "ping" ]
      interval: 30s
      timeout: 10s
      retries: 3
  api-server:
    build:
      context: .
      dockerfile: build/docker/secure-prod.Dockerfile
    container_name: ml-prod-api
    ports:
      - "9103:9101"  # API server port
      - "2222:2222"  # Secure SSH port for Podman communication
      - "9100:9100"  # Prometheus metrics
    volumes:
      - ./data:/app/data/experiments
      - ./logs:/logs
      - ./configs/config-multi-user.yaml:/app/configs/config.yaml
    depends_on:
      redis:
        condition: service_healthy
    restart: unless-stopped
    environment:
      - REDIS_URL=redis://redis:6379
      - LOG_LEVEL=info
    healthcheck:
      test: [ "CMD", "curl", "-k", "https://localhost:9101/health" ]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
    # Start SSH daemon for Podman communication
    command: ["/usr/local/bin/api-server", "-config", "/app/configs/config.yaml"]
  worker:
    build:
      context: .
      dockerfile: build/docker/secure-prod.Dockerfile
    container_name: ml-prod-worker
    volumes:
      - ./data:/app/data/experiments
      - ./logs:/logs
      - ./configs/worker-docker.yaml:/app/configs/worker.yaml
    depends_on:
      redis:
        condition: service_healthy
      api-server:
        condition: service_healthy
    restart: unless-stopped
    environment:
      - REDIS_URL=redis://redis:6379
      - LOG_LEVEL=info
    privileged: true  # Required for Podman to work in Docker
    command: ["/usr/local/bin/worker", "-config", "/app/configs/worker.yaml"]
 volumes:
  redis_prod_data:
    driver: local
 networks:
  default:
    name: ml-prod-network
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -72,8 +72,7 @@ services:
    volumes:
      - grafana_data:/var/lib/grafana
      - ./monitoring/grafana/provisioning:/etc/grafana/provisioning
-      - ./monitoring/grafana-dashboard.json:/var/lib/grafana/dashboards/ml-queue.json
+      - ./monitoring/dashboards:/var/lib/grafana/dashboards
      - ./monitoring/logs-dashboard.json:/var/lib/grafana/dashboards/logs.json
    environment:
      - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin}
      - GF_USERS_ALLOW_SIGN_UP=false
--- a/docs/_config.yml
+++ b/docs/_config.yml
@ -45,6 +45,8 @@ nav:
        url: "/contributing/"
      - title: "API Reference"
        url: "/api/"
      - title: "Performance Monitoring"
        url: "/performance-monitoring/"
 # Collections
 collections:
--- a/docs/_site/404.html
+++ b/docs/_site/404.html
@ -332,6 +332,28 @@
      <li class="md-tabs__item">
        <a href="/adr/" class="md-tabs__link">
  Architecture Decisions
        </a>
      </li>
    </ul>
  </div>
 </nav>
@ -1381,6 +1403,172 @@
    <li class="md-nav__item md-nav__item--nested">
        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_7" >
          <div class="md-nav__link md-nav__container">
            <a href="/adr/" class="md-nav__link ">
  <span class="md-ellipsis">
    Architecture Decisions
  </span>
            </a>
              <label class="md-nav__link " for="__nav_7" id="__nav_7_label" tabindex="0">
                <span class="md-nav__icon md-icon"></span>
              </label>
          </div>
        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
          <label class="md-nav__title" for="__nav_7">
            <span class="md-nav__icon md-icon"></span>
    Architecture Decisions
          </label>
          <ul class="md-nav__list" data-md-scrollfix>
    <li class="md-nav__item">
      <a href="/adr/ADR-001-use-go-for-api-server/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-001: Use Go for API Server
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="/adr/ADR-002-use-sqlite-for-local-development/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-002: Use SQLite for Local Development
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="/adr/ADR-003-use-redis-for-job-queue/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-003: Use Redis for Job Queue
  </span>
      </a>
    </li>
          </ul>
        </nav>
    </li>
  </ul>
 </nav>
                  </div>
--- a/docs/_site/adr/ADR-001-use-go-for-api-server/index.html
+++ b/docs/_site/adr/ADR-001-use-go-for-api-server/index.html
--- a/docs/_site/adr/ADR-002-use-sqlite-for-local-development/index.html
+++ b/docs/_site/adr/ADR-002-use-sqlite-for-local-development/index.html
--- a/docs/_site/adr/ADR-003-use-redis-for-job-queue/index.html
+++ b/docs/_site/adr/ADR-003-use-redis-for-job-queue/index.html
--- a/docs/_site/adr/index.html
+++ b/docs/_site/adr/index.html
--- a/docs/_site/api-key-process/index.html
+++ b/docs/_site/api-key-process/index.html
@ -343,6 +343,28 @@
      <li class="md-tabs__item">
        <a href="../adr/" class="md-tabs__link">
  Architecture Decisions
        </a>
      </li>
    </ul>
  </div>
 </nav>
@ -1542,6 +1564,172 @@
    <li class="md-nav__item md-nav__item--nested">
        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_7" >
          <div class="md-nav__link md-nav__container">
            <a href="../adr/" class="md-nav__link ">
  <span class="md-ellipsis">
    Architecture Decisions
  </span>
            </a>
              <label class="md-nav__link " for="__nav_7" id="__nav_7_label" tabindex="0">
                <span class="md-nav__icon md-icon"></span>
              </label>
          </div>
        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
          <label class="md-nav__title" for="__nav_7">
            <span class="md-nav__icon md-icon"></span>
    Architecture Decisions
          </label>
          <ul class="md-nav__list" data-md-scrollfix>
    <li class="md-nav__item">
      <a href="../adr/ADR-001-use-go-for-api-server/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-001: Use Go for API Server
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-002-use-sqlite-for-local-development/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-002: Use SQLite for Local Development
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-003-use-redis-for-job-queue/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-003: Use Redis for Job Queue
  </span>
      </a>
    </li>
          </ul>
        </nav>
    </li>
  </ul>
 </nav>
                  </div>
--- a/docs/_site/architecture/index.html
+++ b/docs/_site/architecture/index.html
@ -343,6 +343,28 @@
      <li class="md-tabs__item">
        <a href="../adr/" class="md-tabs__link">
  Architecture Decisions
        </a>
      </li>
    </ul>
  </div>
 </nav>
@ -2115,6 +2137,172 @@
    <li class="md-nav__item md-nav__item--nested">
        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_7" >
          <div class="md-nav__link md-nav__container">
            <a href="../adr/" class="md-nav__link ">
  <span class="md-ellipsis">
    Architecture Decisions
  </span>
            </a>
              <label class="md-nav__link " for="__nav_7" id="__nav_7_label" tabindex="0">
                <span class="md-nav__icon md-icon"></span>
              </label>
          </div>
        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
          <label class="md-nav__title" for="__nav_7">
            <span class="md-nav__icon md-icon"></span>
    Architecture Decisions
          </label>
          <ul class="md-nav__list" data-md-scrollfix>
    <li class="md-nav__item">
      <a href="../adr/ADR-001-use-go-for-api-server/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-001: Use Go for API Server
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-002-use-sqlite-for-local-development/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-002: Use SQLite for Local Development
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-003-use-redis-for-job-queue/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-003: Use Redis for Job Queue
  </span>
      </a>
    </li>
          </ul>
        </nav>
    </li>
  </ul>
 </nav>
                  </div>
--- a/docs/_site/cicd/index.html
+++ b/docs/_site/cicd/index.html
@ -343,6 +343,28 @@
      <li class="md-tabs__item">
        <a href="../adr/" class="md-tabs__link">
  Architecture Decisions
        </a>
      </li>
    </ul>
  </div>
 </nav>
@ -1654,6 +1676,172 @@
    <li class="md-nav__item md-nav__item--nested">
        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_7" >
          <div class="md-nav__link md-nav__container">
            <a href="../adr/" class="md-nav__link ">
  <span class="md-ellipsis">
    Architecture Decisions
  </span>
            </a>
              <label class="md-nav__link " for="__nav_7" id="__nav_7_label" tabindex="0">
                <span class="md-nav__icon md-icon"></span>
              </label>
          </div>
        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
          <label class="md-nav__title" for="__nav_7">
            <span class="md-nav__icon md-icon"></span>
    Architecture Decisions
          </label>
          <ul class="md-nav__list" data-md-scrollfix>
    <li class="md-nav__item">
      <a href="../adr/ADR-001-use-go-for-api-server/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-001: Use Go for API Server
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-002-use-sqlite-for-local-development/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-002: Use SQLite for Local Development
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-003-use-redis-for-job-queue/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-003: Use Redis for Job Queue
  </span>
      </a>
    </li>
          </ul>
        </nav>
    </li>
  </ul>
 </nav>
                  </div>
--- a/docs/_site/cli-reference/index.html
+++ b/docs/_site/cli-reference/index.html
@ -343,6 +343,28 @@
      <li class="md-tabs__item">
        <a href="../adr/" class="md-tabs__link">
  Architecture Decisions
        </a>
      </li>
    </ul>
  </div>
 </nav>
@ -1853,6 +1875,172 @@
    <li class="md-nav__item md-nav__item--nested">
        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_7" >
          <div class="md-nav__link md-nav__container">
            <a href="../adr/" class="md-nav__link ">
  <span class="md-ellipsis">
    Architecture Decisions
  </span>
            </a>
              <label class="md-nav__link " for="__nav_7" id="__nav_7_label" tabindex="0">
                <span class="md-nav__icon md-icon"></span>
              </label>
          </div>
        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
          <label class="md-nav__title" for="__nav_7">
            <span class="md-nav__icon md-icon"></span>
    Architecture Decisions
          </label>
          <ul class="md-nav__list" data-md-scrollfix>
    <li class="md-nav__item">
      <a href="../adr/ADR-001-use-go-for-api-server/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-001: Use Go for API Server
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-002-use-sqlite-for-local-development/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-002: Use SQLite for Local Development
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-003-use-redis-for-job-queue/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-003: Use Redis for Job Queue
  </span>
      </a>
    </li>
          </ul>
        </nav>
    </li>
  </ul>
 </nav>
                  </div>
--- a/docs/_site/configuration-schema/index.html
+++ b/docs/_site/configuration-schema/index.html
@ -343,6 +343,28 @@
      <li class="md-tabs__item">
        <a href="../adr/" class="md-tabs__link">
  Architecture Decisions
        </a>
      </li>
    </ul>
  </div>
 </nav>
@ -1481,6 +1503,172 @@
    <li class="md-nav__item md-nav__item--nested">
        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_7" >
          <div class="md-nav__link md-nav__container">
            <a href="../adr/" class="md-nav__link ">
  <span class="md-ellipsis">
    Architecture Decisions
  </span>
            </a>
              <label class="md-nav__link " for="__nav_7" id="__nav_7_label" tabindex="0">
                <span class="md-nav__icon md-icon"></span>
              </label>
          </div>
        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
          <label class="md-nav__title" for="__nav_7">
            <span class="md-nav__icon md-icon"></span>
    Architecture Decisions
          </label>
          <ul class="md-nav__list" data-md-scrollfix>
    <li class="md-nav__item">
      <a href="../adr/ADR-001-use-go-for-api-server/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-001: Use Go for API Server
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-002-use-sqlite-for-local-development/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-002: Use SQLite for Local Development
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-003-use-redis-for-job-queue/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-003: Use Redis for Job Queue
  </span>
      </a>
    </li>
          </ul>
        </nav>
    </li>
  </ul>
 </nav>
                  </div>
--- a/docs/_site/deployment/index.html
+++ b/docs/_site/deployment/index.html
@ -343,6 +343,28 @@
      <li class="md-tabs__item">
        <a href="../adr/" class="md-tabs__link">
  Architecture Decisions
        </a>
      </li>
    </ul>
  </div>
 </nav>
@ -1971,6 +1993,172 @@
    <li class="md-nav__item md-nav__item--nested">
        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_7" >
          <div class="md-nav__link md-nav__container">
            <a href="../adr/" class="md-nav__link ">
  <span class="md-ellipsis">
    Architecture Decisions
  </span>
            </a>
              <label class="md-nav__link " for="__nav_7" id="__nav_7_label" tabindex="0">
                <span class="md-nav__icon md-icon"></span>
              </label>
          </div>
        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
          <label class="md-nav__title" for="__nav_7">
            <span class="md-nav__icon md-icon"></span>
    Architecture Decisions
          </label>
          <ul class="md-nav__list" data-md-scrollfix>
    <li class="md-nav__item">
      <a href="../adr/ADR-001-use-go-for-api-server/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-001: Use Go for API Server
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-002-use-sqlite-for-local-development/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-002: Use SQLite for Local Development
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-003-use-redis-for-job-queue/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-003: Use Redis for Job Queue
  </span>
      </a>
    </li>
          </ul>
        </nav>
    </li>
  </ul>
 </nav>
                  </div>
--- a/docs/_site/development-setup/index.html
+++ b/docs/_site/development-setup/index.html
@ -343,6 +343,28 @@
      <li class="md-tabs__item">
        <a href="../adr/" class="md-tabs__link">
  Architecture Decisions
        </a>
      </li>
    </ul>
  </div>
 </nav>
@ -1508,6 +1530,172 @@
    <li class="md-nav__item md-nav__item--nested">
        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_7" >
          <div class="md-nav__link md-nav__container">
            <a href="../adr/" class="md-nav__link ">
  <span class="md-ellipsis">
    Architecture Decisions
  </span>
            </a>
              <label class="md-nav__link " for="__nav_7" id="__nav_7_label" tabindex="0">
                <span class="md-nav__icon md-icon"></span>
              </label>
          </div>
        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
          <label class="md-nav__title" for="__nav_7">
            <span class="md-nav__icon md-icon"></span>
    Architecture Decisions
          </label>
          <ul class="md-nav__list" data-md-scrollfix>
    <li class="md-nav__item">
      <a href="../adr/ADR-001-use-go-for-api-server/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-001: Use Go for API Server
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-002-use-sqlite-for-local-development/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-002: Use SQLite for Local Development
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-003-use-redis-for-job-queue/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-003: Use Redis for Job Queue
  </span>
      </a>
    </li>
          </ul>
        </nav>
    </li>
  </ul>
 </nav>
                  </div>
--- a/docs/_site/environment-variables/index.html
+++ b/docs/_site/environment-variables/index.html
@ -343,6 +343,28 @@
      <li class="md-tabs__item">
        <a href="../adr/" class="md-tabs__link">
  Architecture Decisions
        </a>
      </li>
    </ul>
  </div>
 </nav>
@ -1608,6 +1630,172 @@
    <li class="md-nav__item md-nav__item--nested">
        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_7" >
          <div class="md-nav__link md-nav__container">
            <a href="../adr/" class="md-nav__link ">
  <span class="md-ellipsis">
    Architecture Decisions
  </span>
            </a>
              <label class="md-nav__link " for="__nav_7" id="__nav_7_label" tabindex="0">
                <span class="md-nav__icon md-icon"></span>
              </label>
          </div>
        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
          <label class="md-nav__title" for="__nav_7">
            <span class="md-nav__icon md-icon"></span>
    Architecture Decisions
          </label>
          <ul class="md-nav__list" data-md-scrollfix>
    <li class="md-nav__item">
      <a href="../adr/ADR-001-use-go-for-api-server/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-001: Use Go for API Server
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-002-use-sqlite-for-local-development/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-002: Use SQLite for Local Development
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-003-use-redis-for-job-queue/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-003: Use Redis for Job Queue
  </span>
      </a>
    </li>
          </ul>
        </nav>
    </li>
  </ul>
 </nav>
                  </div>
--- a/docs/_site/first-experiment/index.html
+++ b/docs/_site/first-experiment/index.html
@ -343,6 +343,28 @@
      <li class="md-tabs__item">
        <a href="../adr/" class="md-tabs__link">
  Architecture Decisions
        </a>
      </li>
    </ul>
  </div>
 </nav>
@ -1664,6 +1686,172 @@
    <li class="md-nav__item md-nav__item--nested">
        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_7" >
          <div class="md-nav__link md-nav__container">
            <a href="../adr/" class="md-nav__link ">
  <span class="md-ellipsis">
    Architecture Decisions
  </span>
            </a>
              <label class="md-nav__link " for="__nav_7" id="__nav_7_label" tabindex="0">
                <span class="md-nav__icon md-icon"></span>
              </label>
          </div>
        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
          <label class="md-nav__title" for="__nav_7">
            <span class="md-nav__icon md-icon"></span>
    Architecture Decisions
          </label>
          <ul class="md-nav__list" data-md-scrollfix>
    <li class="md-nav__item">
      <a href="../adr/ADR-001-use-go-for-api-server/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-001: Use Go for API Server
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-002-use-sqlite-for-local-development/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-002: Use SQLite for Local Development
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-003-use-redis-for-job-queue/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-003: Use Redis for Job Queue
  </span>
      </a>
    </li>
          </ul>
        </nav>
    </li>
  </ul>
 </nav>
                  </div>
--- a/docs/_site/index.html
+++ b/docs/_site/index.html
@ -341,6 +341,28 @@
      <li class="md-tabs__item">
        <a href="adr/" class="md-tabs__link">
  Architecture Decisions
        </a>
      </li>
    </ul>
  </div>
 </nav>
@ -1553,6 +1575,172 @@
    <li class="md-nav__item md-nav__item--nested">
        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_7" >
          <div class="md-nav__link md-nav__container">
            <a href="adr/" class="md-nav__link ">
  <span class="md-ellipsis">
    Architecture Decisions
  </span>
            </a>
              <label class="md-nav__link " for="__nav_7" id="__nav_7_label" tabindex="0">
                <span class="md-nav__icon md-icon"></span>
              </label>
          </div>
        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
          <label class="md-nav__title" for="__nav_7">
            <span class="md-nav__icon md-icon"></span>
    Architecture Decisions
          </label>
          <ul class="md-nav__list" data-md-scrollfix>
    <li class="md-nav__item">
      <a href="adr/ADR-001-use-go-for-api-server/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-001: Use Go for API Server
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="adr/ADR-002-use-sqlite-for-local-development/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-002: Use SQLite for Local Development
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="adr/ADR-003-use-redis-for-job-queue/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-003: Use Redis for Job Queue
  </span>
      </a>
    </li>
          </ul>
        </nav>
    </li>
  </ul>
 </nav>
                  </div>
--- a/docs/_site/installation/index.html
+++ b/docs/_site/installation/index.html
@ -343,6 +343,28 @@
      <li class="md-tabs__item">
        <a href="../adr/" class="md-tabs__link">
  Architecture Decisions
        </a>
      </li>
    </ul>
  </div>
 </nav>
@ -1514,6 +1536,172 @@
    <li class="md-nav__item md-nav__item--nested">
        <input class="md-nav__toggle md-toggle md-toggle--indeterminate" type="checkbox" id="__nav_7" >
          <div class="md-nav__link md-nav__container">
            <a href="../adr/" class="md-nav__link ">
  <span class="md-ellipsis">
    Architecture Decisions
  </span>
            </a>
              <label class="md-nav__link " for="__nav_7" id="__nav_7_label" tabindex="0">
                <span class="md-nav__icon md-icon"></span>
              </label>
          </div>
        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
          <label class="md-nav__title" for="__nav_7">
            <span class="md-nav__icon md-icon"></span>
    Architecture Decisions
          </label>
          <ul class="md-nav__list" data-md-scrollfix>
    <li class="md-nav__item">
      <a href="../adr/ADR-001-use-go-for-api-server/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-001: Use Go for API Server
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-002-use-sqlite-for-local-development/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-002: Use SQLite for Local Development
  </span>
      </a>
    </li>
    <li class="md-nav__item">
      <a href="../adr/ADR-003-use-redis-for-job-queue/" class="md-nav__link">
  <span class="md-ellipsis">
    ADR-003: Use Redis for Job Queue
  </span>
      </a>
    </li>
          </ul>
        </nav>
    </li>
  </ul>
 </nav>
                  </div>
--- a/Show more
+++ b/Show more