From 355d2e311a2e1492f03df10afb1694901a1f044b Mon Sep 17 00:00:00 2001 From: Jeremie Fraeys Date: Mon, 16 Feb 2026 20:38:57 -0500 Subject: [PATCH] docs: update README and CHANGELOG - Update project documentation with latest features - Update manage-artifacts.sh script --- CHANGELOG.md | 10 +- README.md | 48 +---- scripts/manage-artifacts.sh | 360 ++++++++++++++++++------------------ 3 files changed, 196 insertions(+), 222 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 685a5c7..24347b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,13 @@ ## [Unreleased] -- Deployments: production now terminates TLS/WSS at Caddy (reverse proxy) and keeps the API server on internal HTTP/WS. +### Security +- Native: fix buffer overflow vulnerabilities in `dataset_hash` (replaced `strcpy` with `strncpy` + null termination) +- Native: fix unsafe `memcpy` in `queue_index` priority queue (added explicit null terminators for string fields) +- Native: add path traversal protection in `queue_index` storage (rejects `..` and null bytes in queue directory paths) +- Native: add mmap size limits (100MB max) to prevent unbounded memory mapping exposure +- Native: modularize C++ libraries with clean layering (common, queue_index, dataset_hash) + +### Added - Tests: add e2e coverage for `wss://` upgrade through a TLS-terminating reverse proxy. - Worker: verify `dataset_specs[].checksum` when provided and fail tasks on mismatch. - Worker: verify `snapshot_id` using `snapshot_sha256` and fail-closed (supports local `data_dir/snapshots/` and optional S3-backed `snapshot_store`). @@ -12,3 +19,4 @@ - Worker: export env prewarm hit/miss/built counters and total build time via the worker Prometheus metrics endpoint. - API/Worker: `ml prune` also triggers best-effort garbage collection of warmed env images. - API: add `/health/ok` (when health checks are enabled) and wrap HTTP handlers with Prometheus HTTP request metrics when Prometheus is enabled. +- CLI/API: add `ml logs` command to fetch and follow job logs from running or completed experiments via WebSocket. diff --git a/README.md b/README.md index 68cf2d1..6e2f3e7 100644 --- a/README.md +++ b/README.md @@ -115,35 +115,17 @@ See `CHANGELOG.md`. ## Build -### Native C++ Libraries (Optional Performance Optimization) +### Native C++ Libraries (Optional) -FetchML includes optional C++ native libraries for performance-critical operations: -- **dataset_hash**: mmap + SIMD SHA256 hashing (78% syscall reduction) -- **queue_index**: Binary index format (96% syscall reduction) -- **artifact_scanner**: Fast directory traversal (87% syscall reduction) -- **streaming_io**: Parallel gzip extraction (95% syscall reduction) - -**Requirements:** CMake 3.15+, C++17 compiler, zlib +FetchML includes optional C++ native libraries for performance. See `docs/src/native-libraries.md` for detailed build instructions. +Quick start: ```bash -# Build native libraries -make native-build # Development build -make native-release # Production optimized (-O3) -make native-debug # Debug build with ASan - -# Enable native libraries at runtime -export FETCHML_NATIVE_LIBS=1 - -# Build Go binaries with native library support -make prod-with-native # Copies .so/.dylib files to bin/ +make native-build # Build native libs +make native-smoke # Run smoke test +export FETCHML_NATIVE_LIBS=1 # Enable at runtime ``` -**Deployment:** Ship the native libraries alongside your Go binaries: -- Linux: `lib*.so` files -- macOS: `lib*.dylib` files - -The libraries are loaded dynamically via cgo. If not found, FetchML falls back to pure Go implementations. - ### Standard Build ```bash @@ -164,6 +146,7 @@ make cross-platform # builds for Linux/macOS/Windows ## Docs See `docs/` for detailed guides: +- `docs/src/native-libraries.md` – Native C++ libraries (build, test, deploy) - `docs/src/zig-cli.md` – CLI reference - `docs/src/quick-start.md` – Full setup guide - `docs/src/deployment.md` – Production deployment @@ -174,23 +157,6 @@ The FetchML source code is intentionally not hosted on GitHub. The canonical source repository is available at: ``. -## Contributing - -Contributions are welcome. - -- **Questions / bug reports**: Use GitHub Issues: ``. Include: - - how to reproduce - - expected vs actual behavior - - logs/config snippets (sanitize secrets) - - OS + versions (Go, Zig, Podman/Docker if relevant) -- **Changes**: Submit a patch in a GitHub issue. - - Create a topic branch. - - Run tests/linters. - - Export your change as either: - - a patch series: `git format-patch -N origin/main`, or - - a single bundle: `git bundle create fetchml.bundle origin/main..HEAD` - - Attach the generated files to a GitHub issue at ``. - ## License FetchML is source-available for transparency and auditability. It is not open-source. diff --git a/scripts/manage-artifacts.sh b/scripts/manage-artifacts.sh index 706229f..94a1d8b 100755 --- a/scripts/manage-artifacts.sh +++ b/scripts/manage-artifacts.sh @@ -14,149 +14,149 @@ ARCHIVE_DIR="$LOCAL_ARTIFACTS_DIR/archive" mkdir -p "$LOCAL_ARTIFACTS_DIR" case "${1:-help}" in - "list") - echo "=== Benchmark Runs ===" +"list") + echo "=== Benchmark Runs ===" - # Fast empty-state: no run directories. - if ! compgen -G "$LOCAL_ARTIFACTS_DIR/run_*" >/dev/null; then - echo "(no runs found)" - exit 0 - fi + # Fast empty-state: no run directories. + if ! compgen -G "$LOCAL_ARTIFACTS_DIR/run_*" >/dev/null; then + echo "(no runs found)" + exit 0 + fi - # List newest-first without parsing `ls -l` output. - for run_dir in $(ls -1dt "$LOCAL_ARTIFACTS_DIR"/run_* 2>/dev/null); do - if [ ! -d "$run_dir" ]; then - continue - fi + # List newest-first without parsing `ls -l` output. + for run_dir in $(glob -1dt "$LOCAL_ARTIFACTS_DIR"/run_* 2>/dev/null); do + if [ ! -d "$run_dir" ]; then + continue + fi - timestamp=$(basename "$run_dir" | sed 's/run_//') - echo "Run: $timestamp" - echo " Path: $run_dir" - if [ -f "$run_dir/report.html" ]; then - echo " Report: $run_dir/report.html" - fi - if [ -f "$run_dir/prometheus_metrics.txt" ]; then - metrics_count=$(grep -c "benchmark_" "$run_dir/prometheus_metrics.txt" 2>/dev/null || echo "0") - echo " Metrics: $metrics_count benchmarks" - fi - echo "" - done - ;; - - "clean") - echo "=== Cleaning Artifacts ===" - case "${2:-all}" in - "all") - echo "Archiving all artifacts..." - stamp=$(date -u +%Y%m%d-%H%M%S) - mkdir -p "$ARCHIVE_DIR/$stamp" - mv "$LOCAL_ARTIFACTS_DIR"/run_* "$ARCHIVE_DIR/$stamp"/ 2>/dev/null || true - echo "All artifacts archived" - ;; - "old") - keep_count="${3:-10}" - echo "Keeping last $keep_count runs, archiving older ones..." - stamp=$(date -u +%Y%m%d-%H%M%S) - mkdir -p "$ARCHIVE_DIR/$stamp" - cd "$LOCAL_ARTIFACTS_DIR" - ls -1t run_* 2>/dev/null | tail -n +$((keep_count + 1)) | while read -r run; do - echo "Archiving: $run" - mv "$run" "$ARCHIVE_DIR/$stamp/" 2>/dev/null || true - done - ;; - "run") - run_id="${3:-}" - if [ -z "$run_id" ]; then - echo "Usage: $0 clean run " - echo "Available runs:" - ls -1 "$LOCAL_ARTIFACTS_DIR"/run_* 2>/dev/null | sed 's/.*run_//' || echo "No runs found" - exit 1 - fi - run_dir="$LOCAL_ARTIFACTS_DIR/run_$run_id" - if [ -d "$run_dir" ]; then - echo "Archiving run: $run_id" - stamp=$(date -u +%Y%m%d-%H%M%S) - mkdir -p "$ARCHIVE_DIR/$stamp" - mv "$run_dir" "$ARCHIVE_DIR/$stamp/" 2>/dev/null || true - else - echo "Run not found: $run_id" - fi - ;; - *) - echo "Usage: $0 clean [all|old|run ]" - exit 1 - ;; - esac - ;; - - "compare") - run1="${2:-}" - run2="${3:-}" - if [ -z "$run1" ] || [ -z "$run2" ]; then - echo "Usage: $0 compare " - echo "Available runs:" - ls -1 "$LOCAL_ARTIFACTS_DIR"/run_* 2>/dev/null | sed 's/.*run_//' || echo "No runs found" - exit 1 - fi - - echo "=== Comparing Runs ===" - echo "Run 1: $run1" - echo "Run 2: $run2" - echo "" - - metrics1="$LOCAL_ARTIFACTS_DIR/run_$run1/prometheus_metrics.txt" - metrics2="$LOCAL_ARTIFACTS_DIR/run_$run2/prometheus_metrics.txt" - - if [ ! -f "$metrics1" ] || [ ! -f "$metrics2" ]; then - echo "One or both runs not found" - exit 1 - fi - - echo "Benchmark Comparison:" - printf "%-40s %-15s %-15s %-10s\n" "Benchmark" "Run 1 (ns)" "Run 2 (ns)" "Change" - printf "%-40s %-15s %-15s %-10s\n" "--------" "----------" "----------" "------" - - grep "benchmark_time_per_op" "$metrics1" | while read -r line1; do - benchmark=$(echo "$line1" | sed 's/.*benchmark="\([^"]*\)".*/\1/') - value1=$(echo "$line1" | awk '{print $2}') - - line2=$(grep "benchmark_time_per_op.*benchmark=\"$benchmark\"" "$metrics2" || true) - if [ -n "$line2" ]; then - value2=$(echo "$line2" | awk '{print $2}') - - # Calculate percentage change - if [ "$value1" != "0" ]; then - change=$(echo "scale=2; (($value2 - $value1) / $value1) * 100" | bc 2>/dev/null || echo "N/A") - printf "%-40s %-15s %-15s %-10s\n" "$benchmark" "$value1" "$value2" "${change}%" - else - printf "%-40s %-15s %-15s %-10s\n" "$benchmark" "$value1" "$value2" "N/A" - fi - fi - done - ;; - - "export") - run_id="${2:-}" - format="${3:-json}" - if [ -z "$run_id" ]; then - echo "Usage: $0 export [json|csv]" - echo "Available runs:" - ls -1 "$LOCAL_ARTIFACTS_DIR"/run_* 2>/dev/null | sed 's/.*run_//' || echo "No runs found" - exit 1 - fi - - run_dir="$LOCAL_ARTIFACTS_DIR/run_$run_id" - if [ ! -d "$run_dir" ]; then - echo "Run not found: $run_id" - exit 1 - fi - - output_file="$LOCAL_ARTIFACTS_DIR/export_${run_id}.$format" - - case "$format" in - "json") - echo "Exporting run $run_id to JSON..." - python3 -c " + timestamp=$(basename "$run_dir" | sed 's/run_//') + echo "Run: $timestamp" + echo " Path: $run_dir" + if [ -f "$run_dir/report.html" ]; then + echo " Report: $run_dir/report.html" + fi + if [ -f "$run_dir/prometheus_metrics.txt" ]; then + metrics_count=$(grep -c "benchmark_" "$run_dir/prometheus_metrics.txt" 2>/dev/null || echo "0") + echo " Metrics: $metrics_count benchmarks" + fi + echo "" + done + ;; + +"clean") + echo "=== Cleaning Artifacts ===" + case "${2:-all}" in + "all") + echo "Archiving all artifacts..." + stamp=$(date -u +%Y%m%d-%H%M%S) + mkdir -p "$ARCHIVE_DIR/$stamp" + mv "$LOCAL_ARTIFACTS_DIR"/run_* "$ARCHIVE_DIR/$stamp"/ 2>/dev/null || true + echo "All artifacts archived" + ;; + "old") + keep_count="${3:-10}" + echo "Keeping last $keep_count runs, archiving older ones..." + stamp=$(date -u +%Y%m%d-%H%M%S) + mkdir -p "$ARCHIVE_DIR/$stamp" + cd "$LOCAL_ARTIFACTS_DIR" + ls -1t run_* 2>/dev/null | tail -n +$((keep_count + 1)) | while read -r run; do + echo "Archiving: $run" + mv "$run" "$ARCHIVE_DIR/$stamp/" 2>/dev/null || true + done + ;; + "run") + run_id="${3:-}" + if [ -z "$run_id" ]; then + echo "Usage: $0 clean run " + echo "Available runs:" + ls -1 "$LOCAL_ARTIFACTS_DIR"/run_* 2>/dev/null | sed 's/.*run_//' || echo "No runs found" + exit 1 + fi + run_dir="$LOCAL_ARTIFACTS_DIR/run_$run_id" + if [ -d "$run_dir" ]; then + echo "Archiving run: $run_id" + stamp=$(date -u +%Y%m%d-%H%M%S) + mkdir -p "$ARCHIVE_DIR/$stamp" + mv "$run_dir" "$ARCHIVE_DIR/$stamp/" 2>/dev/null || true + else + echo "Run not found: $run_id" + fi + ;; + *) + echo "Usage: $0 clean [all|old|run ]" + exit 1 + ;; + esac + ;; + +"compare") + run1="${2:-}" + run2="${3:-}" + if [ -z "$run1" ] || [ -z "$run2" ]; then + echo "Usage: $0 compare " + echo "Available runs:" + ls -1 "$LOCAL_ARTIFACTS_DIR"/run_* 2>/dev/null | sed 's/.*run_//' || echo "No runs found" + exit 1 + fi + + echo "=== Comparing Runs ===" + echo "Run 1: $run1" + echo "Run 2: $run2" + echo "" + + metrics1="$LOCAL_ARTIFACTS_DIR/run_$run1/prometheus_metrics.txt" + metrics2="$LOCAL_ARTIFACTS_DIR/run_$run2/prometheus_metrics.txt" + + if [ ! -f "$metrics1" ] || [ ! -f "$metrics2" ]; then + echo "One or both runs not found" + exit 1 + fi + + echo "Benchmark Comparison:" + printf "%-40s %-15s %-15s %-10s\n" "Benchmark" "Run 1 (ns)" "Run 2 (ns)" "Change" + printf "%-40s %-15s %-15s %-10s\n" "--------" "----------" "----------" "------" + + grep "benchmark_time_per_op" "$metrics1" | while read -r line1; do + benchmark=$(echo "$line1" | sed 's/.*benchmark="\([^"]*\)".*/\1/') + value1=$(echo "$line1" | awk '{print $2}') + + line2=$(grep "benchmark_time_per_op.*benchmark=\"$benchmark\"" "$metrics2" || true) + if [ -n "$line2" ]; then + value2=$(echo "$line2" | awk '{print $2}') + + # Calculate percentage change + if [ "$value1" != "0" ]; then + change=$(echo "scale=2; (($value2 - $value1) / $value1) * 100" | bc 2>/dev/null || echo "N/A") + printf "%-40s %-15s %-15s %-10s\n" "$benchmark" "$value1" "$value2" "${change}%" + else + printf "%-40s %-15s %-15s %-10s\n" "$benchmark" "$value1" "$value2" "N/A" + fi + fi + done + ;; + +"export") + run_id="${2:-}" + format="${3:-json}" + if [ -z "$run_id" ]; then + echo "Usage: $0 export [json|csv]" + echo "Available runs:" + ls -1 "$LOCAL_ARTIFACTS_DIR"/run_* 2>/dev/null | sed 's/.*run_//' || echo "No runs found" + exit 1 + fi + + run_dir="$LOCAL_ARTIFACTS_DIR/run_$run_id" + if [ ! -d "$run_dir" ]; then + echo "Run not found: $run_id" + exit 1 + fi + + output_file="$LOCAL_ARTIFACTS_DIR/export_${run_id}.$format" + + case "$format" in + "json") + echo "Exporting run $run_id to JSON..." + python3 -c " import json import sys @@ -182,44 +182,44 @@ export_data = { with open('$output_file', 'w') as f: json.dump(export_data, f, indent=2) " - ;; - "csv") - echo "Exporting run $run_id to CSV..." - echo "benchmark,time_per_op_ns,run_id" > "$output_file" - grep "benchmark_time_per_op" "$run_dir/prometheus_metrics.txt" | while read -r line; do - benchmark=$(echo "$line" | sed 's/.*benchmark="\([^"]*\)".*/\1/') - value=$(echo "$line" | awk '{print $2}') - echo "$benchmark,$value,$run_id" >> "$output_file" - done - ;; - *) - echo "Unsupported format: $format" - exit 1 - ;; - esac - - echo "Exported to: $output_file" - ;; - - "help"|*) - echo "Artifact Management Tool" - echo "" - echo "Usage: $0 [args]" - echo "" - echo "Commands:" - echo " list List all benchmark runs" - echo " clean [all|old|run] Archive artifacts" - echo " all Archive all artifacts" - echo " old [count] Keep last N runs (default: 10)" - echo " run Remove specific run" - echo " compare Compare two benchmark runs" - echo " export [format] Export run data (json|csv)" - echo " help Show this help" - echo "" - echo "Examples:" - echo " $0 list" - echo " $0 clean old 5" - echo " $0 compare 20241204_220000 20241204_230000" - echo " $0 export 20241204_220000 json" - ;; + ;; + "csv") + echo "Exporting run $run_id to CSV..." + echo "benchmark,time_per_op_ns,run_id" >"$output_file" + grep "benchmark_time_per_op" "$run_dir/prometheus_metrics.txt" | while read -r line; do + benchmark=$(echo "$line" | sed 's/.*benchmark="\([^"]*\)".*/\1/') + value=$(echo "$line" | awk '{print $2}') + echo "$benchmark,$value,$run_id" >>"$output_file" + done + ;; + *) + echo "Unsupported format: $format" + exit 1 + ;; + esac + + echo "Exported to: $output_file" + ;; + +"help" | *) + echo "Artifact Management Tool" + echo "" + echo "Usage: $0 [args]" + echo "" + echo "Commands:" + echo " list List all benchmark runs" + echo " clean [all|old|run] Archive artifacts" + echo " all Archive all artifacts" + echo " old [count] Keep last N runs (default: 10)" + echo " run Remove specific run" + echo " compare Compare two benchmark runs" + echo " export [format] Export run data (json|csv)" + echo " help Show this help" + echo "" + echo "Examples:" + echo " $0 list" + echo " $0 clean old 5" + echo " $0 compare 20241204_220000 20241204_230000" + echo " $0 export 20241204_220000 json" + ;; esac