ci: align workflows, build scripts, and docs with current architecture
This commit is contained in:
parent
dab680a60d
commit
94112f0af5
21 changed files with 649 additions and 342 deletions
12
.github/workflows/benchmark-metrics.yml
vendored
12
.github/workflows/benchmark-metrics.yml
vendored
|
|
@ -1,12 +1,6 @@
|
|||
name: Benchmark Metrics
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, develop ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
schedule:
|
||||
- cron: '0 6 * * *' # Daily at 6 AM UTC
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
|
|
@ -65,12 +59,14 @@ jobs:
|
|||
done < clean_benchmarks.txt
|
||||
|
||||
- name: Push to Prometheus Pushgateway
|
||||
env:
|
||||
PROMETHEUS_PUSHGATEWAY_URL: ${{ secrets['PROMETHEUS_PUSHGATEWAY_URL'] }}
|
||||
run: |
|
||||
# Push metrics to Prometheus Pushgateway (if configured)
|
||||
if [ -n "${{ secrets.PROMETHEUS_PUSHGATEWAY_URL }}" ]; then
|
||||
if [ -n "$PROMETHEUS_PUSHGATEWAY_URL" ]; then
|
||||
echo "Pushing metrics to Prometheus..."
|
||||
curl --data-binary @prometheus_metrics.txt \
|
||||
"${{ secrets.PROMETHEUS_PUSHGATEWAY_URL }}/metrics/job/benchmark/instance/${{ github.run_id }}"
|
||||
"$PROMETHEUS_PUSHGATEWAY_URL/metrics/job/benchmark/instance/${{ github.run_id }}"
|
||||
else
|
||||
echo "PROMETHEUS_PUSHGATEWAY_URL not configured, skipping push"
|
||||
fi
|
||||
|
|
|
|||
24
.github/workflows/ci.yml
vendored
24
.github/workflows/ci.yml
vendored
|
|
@ -1,10 +1,7 @@
|
|||
name: CI/CD Pipeline
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, develop ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
workflow_dispatch:
|
||||
|
||||
# Concurrency control to prevent multiple runs of the same workflow
|
||||
concurrency:
|
||||
|
|
@ -85,18 +82,12 @@ jobs:
|
|||
|
||||
- name: Run tests
|
||||
run: make test
|
||||
env:
|
||||
REDIS_URL: redis://localhost:6379
|
||||
|
||||
- name: Test internal/queue package
|
||||
run: go test -v -race -coverprofile=queue-coverage.out ./internal/queue/...
|
||||
env:
|
||||
REDIS_URL: redis://localhost:6379
|
||||
|
||||
- name: Run comprehensive tests
|
||||
run: make test-all
|
||||
env:
|
||||
REDIS_URL: redis://localhost:6379
|
||||
|
||||
- name: Run linters
|
||||
run: make lint
|
||||
|
|
@ -111,6 +102,19 @@ jobs:
|
|||
flags: unittests
|
||||
name: codecov-umbrella
|
||||
|
||||
dev-smoke:
|
||||
name: Dev Compose Smoke Test
|
||||
runs-on: ubuntu-latest
|
||||
needs: test
|
||||
timeout-minutes: 20
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Run dev smoke test
|
||||
run: make dev-smoke
|
||||
|
||||
build:
|
||||
name: Build
|
||||
runs-on: ubuntu-latest
|
||||
|
|
|
|||
3
.github/workflows/label.yml
vendored
3
.github/workflows/label.yml
vendored
|
|
@ -1,8 +1,7 @@
|
|||
name: Label Pull Request
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, edited, synchronize]
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
label:
|
||||
|
|
|
|||
10
.github/workflows/license-check.yml
vendored
10
.github/workflows/license-check.yml
vendored
|
|
@ -21,13 +21,7 @@ jobs:
|
|||
echo "LICENSE file is missing"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check if it's MIT license
|
||||
if ! grep -q "MIT License" LICENSE; then
|
||||
echo "License file should be MIT License"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
echo "License file OK"
|
||||
|
||||
- name: Check Go files for license headers
|
||||
|
|
@ -36,7 +30,7 @@ jobs:
|
|||
missing_headers=0
|
||||
|
||||
for file in $(find . -name "*.go" -not -path "./vendor/*" -not -path "./.git/*"); do
|
||||
if ! head -10 "$file" | grep -q "Copyright" && ! head -10 "$file" | grep -q "MIT"; then
|
||||
if ! head -10 "$file" | grep -q "Copyright"; then
|
||||
echo "Missing license header in: $file"
|
||||
missing_headers=$((missing_headers + 1))
|
||||
fi
|
||||
|
|
|
|||
36
.github/workflows/release.yml
vendored
36
.github/workflows/release.yml
vendored
|
|
@ -1,13 +1,12 @@
|
|||
name: Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'v*' # Trigger on version tags like v1.0.0
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
packages: write
|
||||
id-token: write
|
||||
|
||||
env:
|
||||
GO_VERSION: '1.25.0'
|
||||
|
|
@ -113,10 +112,10 @@ jobs:
|
|||
- name: Package binaries
|
||||
run: |
|
||||
cd dist
|
||||
for binary in api-server worker tui data_manager user_manager; do
|
||||
for binary in fetch_ml_*; do
|
||||
if [[ -f "${binary}" ]]; then
|
||||
tar -czf "fetch_ml_${binary}.tar.gz" "${binary}"
|
||||
sha256sum "fetch_ml_${binary}.tar.gz" > "fetch_ml_${binary}.tar.gz.sha256"
|
||||
tar -czf "${binary}.tar.gz" "${binary}"
|
||||
sha256sum "${binary}.tar.gz" > "${binary}.tar.gz.sha256"
|
||||
fi
|
||||
done
|
||||
|
||||
|
|
@ -155,6 +154,20 @@ jobs:
|
|||
sha256sum *.tar.gz > checksums.txt
|
||||
ls -lh
|
||||
|
||||
- name: Install cosign
|
||||
uses: sigstore/cosign-installer@v3
|
||||
|
||||
- name: Sign checksums.txt (keyless)
|
||||
working-directory: release
|
||||
env:
|
||||
COSIGN_YES: "true"
|
||||
run: |
|
||||
cosign sign-blob \
|
||||
--output-signature checksums.txt.sig \
|
||||
--output-certificate checksums.txt.cert \
|
||||
checksums.txt
|
||||
ls -lh checksums.txt checksums.txt.sig checksums.txt.cert
|
||||
|
||||
- name: Create Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
|
|
@ -170,15 +183,12 @@ jobs:
|
|||
- **`ml-macos-arm64.tar.gz`** - macOS Apple Silicon
|
||||
|
||||
### Go Backend Binaries
|
||||
- **`fetch_ml_api-server.tar.gz`** - API Server
|
||||
- **`fetch_ml_worker.tar.gz`** - Worker
|
||||
- **`fetch_ml_tui.tar.gz`** - Terminal UI
|
||||
- **`fetch_ml_data_manager.tar.gz`** - Data Manager
|
||||
- **`fetch_ml_user_manager.tar.gz`** - User Manager
|
||||
|
||||
Included as per-platform tarballs named:
|
||||
- `fetch_ml_<component>_<os>_<arch>[.exe].tar.gz`
|
||||
|
||||
### Installation
|
||||
```bash
|
||||
# Download and extract
|
||||
# Download and extract (example: CLI)
|
||||
tar -xzf ml-<platform>.tar.gz
|
||||
|
||||
# Make executable and move to PATH
|
||||
|
|
|
|||
134
.golangci.yml
134
.golangci.yml
|
|
@ -4,27 +4,11 @@ run:
|
|||
timeout: 5m
|
||||
tests: true
|
||||
output:
|
||||
format: colored-line-number
|
||||
linters-settings:
|
||||
govet:
|
||||
enable:
|
||||
- shadow
|
||||
- fieldalignment
|
||||
gocyclo:
|
||||
min-complexity: 15
|
||||
dupl:
|
||||
threshold: 100
|
||||
goconst:
|
||||
min-len: 3
|
||||
min-occurrences: 3
|
||||
misspell:
|
||||
locale: US
|
||||
lll:
|
||||
line-length: 100
|
||||
revive:
|
||||
confidence: 0.8
|
||||
formats:
|
||||
text:
|
||||
path: stdout
|
||||
linters:
|
||||
disable-all: true
|
||||
default: none
|
||||
enable:
|
||||
- bodyclose
|
||||
- dogsled
|
||||
|
|
@ -51,40 +35,84 @@ linters:
|
|||
- unused
|
||||
- whitespace
|
||||
- revive
|
||||
settings:
|
||||
nolintlint:
|
||||
allow-unused: true
|
||||
govet:
|
||||
enable:
|
||||
- shadow
|
||||
- fieldalignment
|
||||
gocyclo:
|
||||
min-complexity: 15
|
||||
dupl:
|
||||
threshold: 100
|
||||
goconst:
|
||||
min-len: 3
|
||||
min-occurrences: 3
|
||||
misspell:
|
||||
locale: US
|
||||
ignore-rules:
|
||||
- cancelled
|
||||
- cancelling
|
||||
- behaviour
|
||||
- colour
|
||||
- sanitise
|
||||
- initialise
|
||||
- optimise
|
||||
- normalised
|
||||
lll:
|
||||
line-length: 100
|
||||
revive:
|
||||
confidence: 0.8
|
||||
exclusions:
|
||||
rules:
|
||||
# G306: File permissions - acceptable for test files and scripts
|
||||
- text: "G306:"
|
||||
linters:
|
||||
- gosec
|
||||
# Exclude linters for test files
|
||||
- path: ".*_test\\.go"
|
||||
linters:
|
||||
- gocyclo
|
||||
- errcheck
|
||||
- dupl
|
||||
- lll
|
||||
- gosec
|
||||
- revive
|
||||
# Exclude errcheck for tests directory
|
||||
- path: "^tests/"
|
||||
linters:
|
||||
- errcheck
|
||||
# approve insecureSkipVerify in test files
|
||||
- path: _test\\.go
|
||||
text: "insecureSkipVerify"
|
||||
linters:
|
||||
- gosec
|
||||
# Exclude gosec G204 for tests and tools via source match
|
||||
- source: "exec\\.CommandContext"
|
||||
path: "(tests|tools)/"
|
||||
linters:
|
||||
- gosec
|
||||
# Exclude revive for api package naming via source match
|
||||
- source: "^package api$"
|
||||
linters:
|
||||
- revive
|
||||
# Known legacy staticcheck issue
|
||||
- path: "^internal/worker/snapshot_store\\.go$"
|
||||
text: "SA1019"
|
||||
linters:
|
||||
- staticcheck
|
||||
# Known legacy unparam issue
|
||||
- path: "^internal/resources/manager\\.go$"
|
||||
text: "gpuSlotsForTask"
|
||||
linters:
|
||||
- unparam
|
||||
# Ignore whitespace-only lint noise in this file
|
||||
- path: "^internal/api/ws_jobs\\.go$"
|
||||
linters:
|
||||
- whitespace
|
||||
issues:
|
||||
exclude-rules:
|
||||
# G306: File permissions - acceptable for test files and scripts
|
||||
- text: "G306:"
|
||||
linters:
|
||||
- gosec
|
||||
# Exclude linters for test files
|
||||
- path: ".*_test\\.go"
|
||||
linters:
|
||||
- gocyclo
|
||||
- errcheck
|
||||
- dupl
|
||||
- lll
|
||||
- gosec
|
||||
- revive
|
||||
# Exclude errcheck for tests directory
|
||||
- path: "^tests/"
|
||||
linters:
|
||||
- errcheck
|
||||
# approve insecureSkipVerify in test files
|
||||
- path: _test\.go
|
||||
text: "insecureSkipVerify"
|
||||
linters:
|
||||
- gosec
|
||||
# Exclude gosec G204 for tests and tools via source match
|
||||
- source: "exec\\.CommandContext"
|
||||
path: "(tests|tools)/"
|
||||
linters:
|
||||
- gosec
|
||||
# Exclude revive for api package naming via source match
|
||||
- source: "^package api$"
|
||||
linters:
|
||||
- revive
|
||||
max-issues-per-linter: 0
|
||||
max-same-issues: 0
|
||||
severity:
|
||||
default-severity: error
|
||||
default: error
|
||||
|
|
|
|||
|
|
@ -239,15 +239,16 @@ cp .env.example .env.local
|
|||
```
|
||||
|
||||
Key variables for development:
|
||||
- `REDIS_URL`: Redis connection string
|
||||
- `LOG_LEVEL`: Set to `debug` for verbose logging
|
||||
- `API_PORT`: API server port (default: 9101)
|
||||
|
||||
### Configuration Files
|
||||
|
||||
- `configs/config-dev.yaml`: Development configuration
|
||||
- `configs/config-local.yaml`: Local overrides
|
||||
- `configs/config-prod.yaml`: Production settings
|
||||
- `configs/api/dev.yaml`: Development (Docker) API server configuration
|
||||
- `configs/api/homelab-secure.yaml`: Homelab secure API server configuration
|
||||
- `configs/api/prod.yaml`: Production API server configuration
|
||||
- `configs/workers/docker.yaml`: Docker worker configuration
|
||||
- `configs/workers/worker-prod.toml`: Production worker configuration
|
||||
|
||||
## IDE Setup
|
||||
|
||||
|
|
|
|||
89
LICENSE
89
LICENSE
|
|
@ -1,21 +1,68 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2024 Fetch ML
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
FetchML Source-Available Research & Audit License (SARAL)
|
||||
|
||||
Copyright (c) 2026 Fetch ML
|
||||
|
||||
This software is source-available for transparency and auditability. It is not
|
||||
open-source.
|
||||
|
||||
1. Definitions
|
||||
1.1 "Software" means the FetchML source code, binaries, documentation, and any
|
||||
accompanying files.
|
||||
1.2 "You" means any individual or entity exercising permissions under this
|
||||
license.
|
||||
1.3 "Commercial Use" means use of the Software (or any Derivative Works) in a
|
||||
way that is primarily intended for or directed toward commercial advantage
|
||||
or monetary compensation, including use by or for a for-profit entity in
|
||||
its business operations. Commercial Use does not include personal hobby use
|
||||
or academic/non-profit research that is not for a commercial purpose.
|
||||
1.4 "Hosted Service" means making the Software (or substantially similar
|
||||
functionality) available to third parties as a service, including via SaaS,
|
||||
managed service, hosted API, or similar offering.
|
||||
1.5 "Redistribute" means to copy, publish, sublicense, sell, rent, lease,
|
||||
transfer, or otherwise provide the Software (or any portion) to any third
|
||||
party, whether in source or binary form.
|
||||
1.6 "Derivative Works" means any modified version of the Software, or any work
|
||||
based on the Software.
|
||||
|
||||
2. Grant of Transparency (Source Viewing and Audit)
|
||||
You may view, read, and audit the source code of the Software for purposes of
|
||||
security review, privacy review, correctness validation, reproducibility of
|
||||
results, and internal evaluation.
|
||||
|
||||
3. Permitted Use
|
||||
Subject to the restrictions in this license, You may use the Software solely
|
||||
for:
|
||||
- personal use; and
|
||||
- non-commercial research, development, and experimentation on systems that You
|
||||
own or control (including homelabs and lab servers).
|
||||
|
||||
You may modify the Software solely for Your internal Permitted Use.
|
||||
|
||||
4. Prohibited Use
|
||||
You may not:
|
||||
- use the Software for Commercial Use;
|
||||
- offer the Software or Derivative Works as a Hosted Service;
|
||||
- Redistribute the Software or any Derivative Works, in source or binary form;
|
||||
- use the Software in a way that enables third parties to access or benefit
|
||||
from the Software as a service;
|
||||
- remove or alter this license.
|
||||
|
||||
5. No Trademark Rights
|
||||
This license does not grant any rights to use the names, logos, or trademarks
|
||||
of Fetch ML or FetchML, except as necessary to comply with attribution
|
||||
requirements in this license.
|
||||
|
||||
6. No Warranty
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
|
||||
7. Limitation of Liability
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
|
||||
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
|
||||
OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
8. Termination
|
||||
Any violation of this license automatically terminates Your rights under this
|
||||
license.
|
||||
|
|
|
|||
230
Makefile
230
Makefile
|
|
@ -1,88 +1,106 @@
|
|||
.PHONY: all build prod dev clean clean-docs test test-unit test-integration test-e2e test-coverage lint install setup validate configlint ci-local docs benchmark benchmark-local artifacts clean-benchmarks clean-all clean-aggressive status load-test chaos-test profile-tools detect-regressions tech-excellence docker-build docker-run docker-stop docker-logs monitoring-performance monitoring-performance-stop dashboard-performance self-cleanup auto-cleanup test-full test-auth test-status
|
||||
|
||||
.PHONY: all build prod dev clean clean-docs test test-unit test-integration test-e2e test-coverage lint install configlint worker-configlint ci-local docs docs-setup docs-build benchmark benchmark-local artifacts clean-benchmarks clean-all clean-aggressive status size load-test chaos-test profile-load profile-load-norate profile-ws-queue profile-tools detect-regressions tech-excellence docker-build dev-smoke self-cleanup test-full test-auth deploy-up deploy-down deploy-status deploy-clean dev-up dev-down dev-status dev-logs prod-up prod-down prod-status prod-logs
|
||||
OK = ✓
|
||||
# Default target
|
||||
all: build
|
||||
|
||||
# Build all components
|
||||
# Build all components (Go binaries + optimized CLI)
|
||||
build:
|
||||
go build -o bin/api-server cmd/api-server/main.go
|
||||
go build -o bin/worker cmd/worker/worker_server.go cmd/worker/worker_config.go
|
||||
go build -o bin/worker cmd/worker/worker_server.go
|
||||
go build -o bin/tui ./cmd/tui
|
||||
cd cli && zig build prod
|
||||
@echo "✓ All components built"
|
||||
cd cli && zig build --release=small
|
||||
@echo "${OK} All components built"
|
||||
|
||||
# Build production-optimized binaries
|
||||
prod:
|
||||
go build -ldflags="-s -w" -o bin/api-server cmd/api-server/main.go
|
||||
go build -ldflags="-s -w" -o bin/worker cmd/worker/worker_server.go cmd/worker/worker_config.go
|
||||
go build -ldflags="-s -w" -o bin/worker cmd/worker/worker_server.go
|
||||
go build -ldflags="-s -w" -o bin/tui ./cmd/tui
|
||||
cd cli && zig build prod && strip zig-out/prod/ml
|
||||
@echo "✓ Production binaries built"
|
||||
cd cli && zig build --release=small
|
||||
@echo "${OK} Production binaries built"
|
||||
|
||||
cross-platform:
|
||||
@rm -rf dist
|
||||
@mkdir -p dist
|
||||
@set -e; \
|
||||
LDFLAGS='-s -w -buildid='; \
|
||||
for target in linux/amd64 linux/arm64 darwin/amd64 darwin/arm64 windows/amd64; do \
|
||||
goos=$${target%/*}; \
|
||||
goarch=$${target#*/}; \
|
||||
ext=''; \
|
||||
if [ "$$goos" = "windows" ]; then ext='.exe'; fi; \
|
||||
echo "Building $$goos/$$goarch..."; \
|
||||
CGO_ENABLED=0 GOOS=$$goos GOARCH=$$goarch go build -trimpath -buildvcs=false -ldflags="$$LDFLAGS" -o dist/fetch_ml_api-server_$${goos}_$${goarch}$${ext} cmd/api-server/main.go; \
|
||||
CGO_ENABLED=0 GOOS=$$goos GOARCH=$$goarch go build -trimpath -buildvcs=false -ldflags="$$LDFLAGS" -o dist/fetch_ml_worker_$${goos}_$${goarch}$${ext} cmd/worker/worker_server.go; \
|
||||
CGO_ENABLED=0 GOOS=$$goos GOARCH=$$goarch go build -trimpath -buildvcs=false -ldflags="$$LDFLAGS" -o dist/fetch_ml_tui_$${goos}_$${goarch}$${ext} ./cmd/tui; \
|
||||
CGO_ENABLED=0 GOOS=$$goos GOARCH=$$goarch go build -trimpath -buildvcs=false -ldflags="$$LDFLAGS" -o dist/fetch_ml_data_manager_$${goos}_$${goarch}$${ext} ./cmd/data_manager; \
|
||||
CGO_ENABLED=0 GOOS=$$goos GOARCH=$$goarch go build -trimpath -buildvcs=false -ldflags="$$LDFLAGS" -o dist/fetch_ml_user_manager_$${goos}_$${goarch}$${ext} ./cmd/user_manager; \
|
||||
done
|
||||
@echo "${OK} Cross-platform binaries built in dist/"
|
||||
|
||||
# Development build (faster compilation)
|
||||
dev:
|
||||
go build -o bin/api-server cmd/api-server/main.go
|
||||
go build -o bin/worker cmd/worker/worker_server.go cmd/worker/worker_config.go
|
||||
go build -o bin/tui ./cmd/tui
|
||||
cd cli && zig build dev
|
||||
@echo "✓ Development binaries built"
|
||||
go build -buildvcs=false -o bin/api-server cmd/api-server/main.go
|
||||
go build -buildvcs=false -o bin/worker cmd/worker/worker_server.go
|
||||
go build -buildvcs=false -o bin/tui ./cmd/tui
|
||||
cd cli && zig build --release=fast
|
||||
@echo "${OK} Development binaries built"
|
||||
|
||||
# Clean build artifacts
|
||||
# Clean build artifacts (Go + Zig + test outputs)
|
||||
clean:
|
||||
rm -rf bin/ coverage/
|
||||
rm -rf cli/zig-out/
|
||||
rm -rf cli/.zig-cache/
|
||||
rm -rf bin/ coverage/ tests/bin/
|
||||
rm -rf cli/zig-out/ cli/.zig-cache/ .zig-cache/
|
||||
go clean
|
||||
@echo "✓ Cleaned"
|
||||
@echo "${OK} Cleaned"
|
||||
|
||||
clean-docs:
|
||||
rm -rf docs/_site/
|
||||
@echo "✓ Cleaned docs"
|
||||
@echo "${OK} Cleaned docs"
|
||||
|
||||
# Run tests
|
||||
test:
|
||||
go test ./tests/...
|
||||
cd cli && zig build test
|
||||
@echo "✓ All tests passed"
|
||||
@echo "${OK} All tests passed"
|
||||
|
||||
# Lint Go and Zig code
|
||||
lint:
|
||||
gofmt -w ./cmd ./internal ./tests || true
|
||||
go vet ./...
|
||||
cd cli && zig fmt .
|
||||
@echo "✓ Lint completed"
|
||||
@echo "${OK} Lint completed"
|
||||
|
||||
# Install to system (requires sudo)
|
||||
install: prod
|
||||
sudo cp bin/api-server /usr/local/bin/fetchml-api
|
||||
sudo cp bin/worker /usr/local/bin/fetchml-worker
|
||||
sudo cp bin/tui /usr/local/bin/fetchml-tui
|
||||
sudo cp cli/zig-out/prod/ml /usr/local/bin/ml
|
||||
@echo "✓ Installed"
|
||||
|
||||
# Setup production environment
|
||||
setup:
|
||||
@if [ "$(shell uname)" = "Linux" ]; then \
|
||||
sudo ./scripts/setup-prod.sh; \
|
||||
else \
|
||||
echo "Production setup is for Linux only. You're on $(shell uname)."; \
|
||||
echo "Use docker-compose for local development."; \
|
||||
fi
|
||||
|
||||
# Validate production configuration
|
||||
validate:
|
||||
./scripts/validate-prod-config.sh configs/config-prod.yaml configs/worker-prod.toml
|
||||
sudo cp cli/zig-out/bin/ml /usr/local/bin/ml
|
||||
@echo "${OK} Installed"
|
||||
|
||||
# Validate YAML configs against JSON schema
|
||||
configlint:
|
||||
go run ./cmd/configlint --schema configs/schema/config_schema.yaml \
|
||||
configs/config-prod.yaml \
|
||||
configs/config-no-tls.yaml \
|
||||
configs/config-dev.yaml
|
||||
go run ./cmd/configlint --schema configs/schema/api_server_config.yaml \
|
||||
configs/api/dev.yaml \
|
||||
configs/api/homelab-secure.yaml \
|
||||
configs/api/multi-user.yaml \
|
||||
configs/api/prod.yaml
|
||||
|
||||
worker-configlint:
|
||||
go run ./cmd/configlint --schema configs/schema/worker_config_schema.yaml \
|
||||
configs/worker-prod.toml
|
||||
configs/workers/worker-prod.toml \
|
||||
configs/workers/docker.yaml \
|
||||
configs/workers/docker-dev.yaml \
|
||||
configs/workers/docker-prod.yaml \
|
||||
configs/workers/homelab-secure.yaml
|
||||
|
||||
dev-smoke:
|
||||
bash ./scripts/smoke-test.sh dev
|
||||
@echo "dev smoke: OK"
|
||||
|
||||
prod-smoke:
|
||||
bash ./scripts/smoke-test.sh prod
|
||||
@echo "prod smoke: OK"
|
||||
|
||||
# Run a local approximation of the CI pipeline
|
||||
ci-local:
|
||||
|
|
@ -95,36 +113,19 @@ ci-local:
|
|||
@echo "Running coverage..."
|
||||
make test-coverage
|
||||
|
||||
# Docker targets
|
||||
# Docker image build (no direct docker-compose run; use deploy-* targets instead)
|
||||
docker-build:
|
||||
docker build -f build/docker/simple.Dockerfile -t fetchml:latest .
|
||||
@echo "✓ Docker image built"
|
||||
|
||||
docker-run:
|
||||
docker-compose up -d
|
||||
@echo "✓ Services started"
|
||||
|
||||
docker-stop:
|
||||
docker-compose down
|
||||
@echo "✓ Services stopped"
|
||||
|
||||
docker-logs:
|
||||
docker-compose logs -f
|
||||
|
||||
# Monitoring setup (Linux only)
|
||||
setup-monitoring:
|
||||
@if [ "$(shell uname)" = "Linux" ]; then \
|
||||
sudo ./scripts/setup-monitoring-prod.sh; \
|
||||
else \
|
||||
echo "Monitoring setup is for Linux production. Use docker-compose for local development."; \
|
||||
fi
|
||||
@echo "${OK} Docker image built"
|
||||
|
||||
# Enhanced test targets
|
||||
test-unit:
|
||||
go test -v -short ./...
|
||||
go test -v -short ./tests/unit/...
|
||||
cd cli && zig build test
|
||||
|
||||
test-integration:
|
||||
go test -v ./...
|
||||
go test -v ./tests/integration/... ./tests
|
||||
cd cli && zig build test
|
||||
|
||||
test-e2e:
|
||||
go test -v ./tests/e2e/...
|
||||
|
|
@ -132,7 +133,7 @@ test-e2e:
|
|||
test-coverage:
|
||||
go test -coverprofile=coverage/coverage.out ./...
|
||||
go tool cover -html=coverage/coverage.out -o coverage/coverage.html
|
||||
@echo "✓ Coverage report: coverage/coverage.html"
|
||||
@echo "${OK} Coverage report: coverage/coverage.html"
|
||||
|
||||
# Documentation setup
|
||||
docs-setup:
|
||||
|
|
@ -170,7 +171,7 @@ benchmark:
|
|||
# Run benchmarks locally with artifact management
|
||||
benchmark-local:
|
||||
@echo "Running benchmarks locally with full workflow..."
|
||||
./scripts/run-benchmarks-local.sh
|
||||
./scripts/benchmarks/run-benchmarks-local.sh
|
||||
|
||||
# Manage benchmark artifacts
|
||||
artifacts:
|
||||
|
|
@ -180,41 +181,27 @@ artifacts:
|
|||
# Clean benchmark artifacts (keep last 10)
|
||||
clean-benchmarks:
|
||||
@echo "Cleaning benchmark artifacts..."
|
||||
./scripts/cleanup-benchmarks.sh benchmarks
|
||||
./scripts/maintenance/cleanup-benchmarks.sh benchmarks
|
||||
|
||||
# Comprehensive cleanup (keep last 5 runs)
|
||||
clean-all:
|
||||
@echo "Running comprehensive cleanup..."
|
||||
./scripts/cleanup-benchmarks.sh all
|
||||
./scripts/maintenance/cleanup-benchmarks.sh all
|
||||
|
||||
# Aggressive cleanup (removes more data)
|
||||
clean-aggressive:
|
||||
@echo "Running aggressive cleanup..."
|
||||
./scripts/cleanup-benchmarks.sh aggressive
|
||||
./scripts/maintenance/cleanup-benchmarks.sh aggressive
|
||||
|
||||
# Show disk usage status
|
||||
status:
|
||||
@echo "Checking disk usage..."
|
||||
./scripts/cleanup-benchmarks.sh status
|
||||
./scripts/maintenance/cleanup-benchmarks.sh status
|
||||
|
||||
# Start performance monitoring stack
|
||||
monitoring-performance:
|
||||
@echo "Starting performance monitoring stack..."
|
||||
cd monitoring && docker-compose -f docker-compose.performance.yml up -d
|
||||
@echo "Grafana available at: http://localhost:3001 (admin/admin)"
|
||||
@echo "Loki available at: http://localhost:3100"
|
||||
@echo "Pushgateway available at: http://localhost:9091"
|
||||
@echo "Quick start guide: docs/src/performance-quick-start.md"
|
||||
size:
|
||||
@echo "Binary sizes:"
|
||||
@ls -lh bin/* cli/zig-out/bin/ml 2>/dev/null || true
|
||||
|
||||
# Stop performance monitoring stack
|
||||
monitoring-performance-stop:
|
||||
@echo "Stopping performance monitoring stack..."
|
||||
cd monitoring && docker-compose -f docker-compose.performance.yml down
|
||||
|
||||
# View performance dashboard
|
||||
dashboard-performance:
|
||||
@echo "Opening performance dashboard..."
|
||||
@echo "URL: http://localhost:3001/d/fetchml-performance/fetch-ml-performance-dashboard"
|
||||
|
||||
# Load testing
|
||||
load-test:
|
||||
|
|
@ -225,18 +212,18 @@ load-test:
|
|||
profile-load:
|
||||
@echo "CPU profiling MediumLoad HTTP load test..."
|
||||
go test ./tests/load -run TestLoadProfile_Medium -count=1 -cpuprofile tests/bin/cpu_load.out
|
||||
@echo "✓ CPU profile written to cpu_load.out (inspect with: go tool pprof tests/bin/cpu_load.out)"
|
||||
@echo "${OK} CPU profile written to cpu_load.out (inspect with: go tool pprof tests/bin/cpu_load.out)"
|
||||
|
||||
profile-load-norate:
|
||||
@echo "CPU profiling MediumLoad HTTP load test (no rate limiting)..."
|
||||
go test ./tests/load -run TestLoadProfile_Medium -count=1 -cpuprofile tests/bin/cpu_load.out -v -args -profile-norate
|
||||
@echo "✓ CPU profile written to cpu_load.out (inspect with: go tool pprof tests/bin/cpu_load.out)"
|
||||
@echo "${OK} CPU profile written to cpu_load.out (inspect with: go tool pprof tests/bin/cpu_load.out)"
|
||||
|
||||
# CPU profiling for WebSocket → Redis queue → worker path
|
||||
profile-ws-queue:
|
||||
@echo "CPU profiling WebSocket queue integration test..."
|
||||
go test ./tests/integration -run WebSocketQueue -count=5 -cpuprofile tests/bin/cpu_ws.out
|
||||
@echo "✓ CPU profile written to cpu_ws.out (inspect with: go tool pprof tests/bin/cpu_ws.out)"
|
||||
@echo "${OK} CPU profile written to cpu_ws.out (inspect with: go tool pprof tests/bin/cpu_ws.out)"
|
||||
|
||||
# Chaos engineering tests
|
||||
chaos-test:
|
||||
|
|
@ -282,24 +269,20 @@ help:
|
|||
@echo ""
|
||||
@echo "Docker Targets:"
|
||||
@echo " make docker-build - Build Docker image"
|
||||
@echo " make docker-run - Start services with docker-compose"
|
||||
@echo " make docker-stop - Stop docker-compose services"
|
||||
@echo " make docker-logs - View docker-compose logs"
|
||||
@echo ""
|
||||
@echo "Test Targets:"
|
||||
@echo " make test - Run all tests"
|
||||
@echo " make test-unit - Run unit tests only"
|
||||
@echo " make test-integration - Run integration tests"
|
||||
@echo " make test-e2e - Run end-to-end tests (Podman test is opt-in via FETCH_ML_E2E_PODMAN=1)"
|
||||
@echo " make test-coverage - Generate coverage report"
|
||||
@echo " make lint - Run formatters and linters"
|
||||
@echo " make ci-local - Run local CI dry-run (tests, lint, config validation, coverage)"
|
||||
@echo " make configlint - Validate YAML configs against schema"
|
||||
@echo " make worker-configlint - Validate worker configs against schema"
|
||||
@echo ""
|
||||
@echo "Setup Targets:"
|
||||
@echo " make install - Install binaries to /usr/local/bin (requires sudo)"
|
||||
@echo " make setup - Run production setup (Linux only)"
|
||||
@echo " make setup-monitoring - Setup monitoring stack (Linux only)"
|
||||
@echo " make validate - Validate production configuration"
|
||||
@echo ""
|
||||
@echo "Performance Testing:"
|
||||
@echo " make benchmark - Run performance benchmarks"
|
||||
|
|
@ -325,10 +308,8 @@ help:
|
|||
@echo "Utility:"
|
||||
@echo " make size - Show binary sizes"
|
||||
@echo " make self-cleanup - Clean up Docker resources"
|
||||
@echo " make auto-cleanup - Setup daily auto-cleanup service"
|
||||
@echo " make test-full - Run complete test suite"
|
||||
@echo " make test-auth - Test multi-user authentication"
|
||||
@echo " make test-status - Check cleanup status"
|
||||
@echo " make help - Show this help"
|
||||
|
||||
# Self-cleaning for Docker resources
|
||||
|
|
@ -336,15 +317,10 @@ self-cleanup:
|
|||
@echo "Running self-cleanup..."
|
||||
@./scripts/maintenance/cleanup.sh
|
||||
|
||||
# Setup auto-cleanup service
|
||||
auto-cleanup:
|
||||
@echo "Setting up auto-cleanup service..."
|
||||
@./scripts/deployment/setup-auto-cleanup.sh
|
||||
|
||||
# Run full test suite
|
||||
test-full:
|
||||
@echo "Running full test suite..."
|
||||
@./scripts/testing/run-full-test-suite.sh
|
||||
@$(MAKE) ci-local
|
||||
|
||||
# Quick authentication test
|
||||
test-auth:
|
||||
|
|
@ -353,7 +329,43 @@ test-auth:
|
|||
@echo "Testing researcher user..." && cp ~/.ml/config-researcher.toml ~/.ml/config.toml && ./cli/zig-out/bin/ml status
|
||||
@echo "Testing analyst user..." && cp ~/.ml/config-analyst.toml ~/.ml/config.toml && ./cli/zig-out/bin/ml status
|
||||
|
||||
# Test cleanup status
|
||||
test-status:
|
||||
@echo "Checking cleanup status..."
|
||||
@./scripts/maintenance/cleanup-status.sh
|
||||
# Deployment management (using organized docker-compose files)
|
||||
deploy-up:
|
||||
@echo "Starting development environment..."
|
||||
@./deployments/deploy.sh dev up
|
||||
|
||||
deploy-down:
|
||||
@echo "Stopping development environment..."
|
||||
@./deployments/deploy.sh dev down
|
||||
|
||||
deploy-status:
|
||||
@echo "Checking deployment status..."
|
||||
@./deployments/deploy.sh dev status
|
||||
|
||||
deploy-clean:
|
||||
@echo "Cleaning all deployments..."
|
||||
@cd deployments && make clean
|
||||
|
||||
dev-up:
|
||||
@./deployments/deploy.sh dev up
|
||||
|
||||
dev-down:
|
||||
@./deployments/deploy.sh dev down
|
||||
|
||||
dev-status:
|
||||
@./deployments/deploy.sh dev status
|
||||
|
||||
dev-logs:
|
||||
@./deployments/deploy.sh dev logs
|
||||
|
||||
prod-up:
|
||||
@./deployments/deploy.sh prod up
|
||||
|
||||
prod-down:
|
||||
@./deployments/deploy.sh prod down
|
||||
|
||||
prod-status:
|
||||
@./deployments/deploy.sh prod status
|
||||
|
||||
prod-logs:
|
||||
@./deployments/deploy.sh prod logs
|
||||
|
|
|
|||
100
README.md
100
README.md
|
|
@ -2,6 +2,66 @@
|
|||
|
||||
A lightweight ML experiment platform with a tiny Zig CLI and a Go backend. Designed for homelabs and small teams.
|
||||
|
||||
## Installation (recommended)
|
||||
|
||||
FetchML publishes pre-built release artifacts (CLI + Go services) on GitHub Releases.
|
||||
|
||||
If you prefer a one-shot check (recommended for most users), you can use:
|
||||
|
||||
```bash
|
||||
./scripts/verify_release.sh --dir . --repo <org>/<repo>
|
||||
```
|
||||
|
||||
1) Download the right archive for your platform
|
||||
|
||||
2) Verify `checksums.txt` signature (recommended)
|
||||
|
||||
The release includes a signed `checksums.txt` plus:
|
||||
|
||||
- `checksums.txt.sig`
|
||||
- `checksums.txt.cert`
|
||||
|
||||
Verify the signature (keyless Sigstore) using cosign:
|
||||
|
||||
```bash
|
||||
cosign verify-blob \
|
||||
--certificate checksums.txt.cert \
|
||||
--signature checksums.txt.sig \
|
||||
--certificate-identity-regexp "^https://github.com/<org>/<repo>/.github/workflows/release.yml@refs/tags/v.*$" \
|
||||
--certificate-oidc-issuer https://token.actions.githubusercontent.com \
|
||||
checksums.txt
|
||||
```
|
||||
|
||||
3) Verify the SHA256 checksum against `checksums.txt`
|
||||
|
||||
4) Extract and install
|
||||
|
||||
Example (CLI on Linux x86_64):
|
||||
|
||||
```bash
|
||||
# Download
|
||||
curl -fsSLO https://github.com/<org>/<repo>/releases/download/<tag>/ml-linux-x86_64.tar.gz
|
||||
curl -fsSLO https://github.com/<org>/<repo>/releases/download/<tag>/checksums.txt
|
||||
curl -fsSLO https://github.com/<org>/<repo>/releases/download/<tag>/checksums.txt.sig
|
||||
curl -fsSLO https://github.com/<org>/<repo>/releases/download/<tag>/checksums.txt.cert
|
||||
|
||||
# Verify
|
||||
cosign verify-blob \
|
||||
--certificate checksums.txt.cert \
|
||||
--signature checksums.txt.sig \
|
||||
--certificate-identity-regexp "^https://github.com/<org>/<repo>/.github/workflows/release.yml@refs/tags/v.*$" \
|
||||
--certificate-oidc-issuer https://token.actions.githubusercontent.com \
|
||||
checksums.txt
|
||||
sha256sum -c --ignore-missing checksums.txt
|
||||
|
||||
# Install
|
||||
tar -xzf ml-linux-x86_64.tar.gz
|
||||
chmod +x ml-linux-x86_64
|
||||
sudo mv ml-linux-x86_64 /usr/local/bin/ml
|
||||
|
||||
ml --help
|
||||
```
|
||||
|
||||
## Quick start
|
||||
|
||||
```bash
|
||||
|
|
@ -12,7 +72,7 @@ docker-compose up -d
|
|||
|
||||
# Or build the CLI locally
|
||||
cd cli && make all
|
||||
./build/ml --help
|
||||
./zig-out/bin/ml --help
|
||||
```
|
||||
|
||||
## What you get
|
||||
|
|
@ -42,6 +102,17 @@ ml dataset list
|
|||
ml monitor # SSH to run TUI remotely
|
||||
```
|
||||
|
||||
## Phase 1 (V1) notes
|
||||
|
||||
- **Task schema** supports optional `snapshot_id` (opaque identifier) and `dataset_specs` (structured dataset inputs). If `dataset_specs` is present it takes precedence over legacy `datasets` / `--datasets` args.
|
||||
- **Snapshot restore (S1)** stages verified `snapshot_id` into each task workspace and exposes it via `FETCH_ML_SNAPSHOT_DIR` and `FETCH_ML_SNAPSHOT_ID`. If `snapshot_store.enabled: true` in the worker config, the worker will pull `<prefix>/<snapshot_id>.tar.gz` from an S3-compatible store (e.g. MinIO), verify `snapshot_sha256`, and cache it under `data_dir/snapshots/sha256/<snapshot_sha256>`.
|
||||
- **Prewarm (best-effort)** can fetch datasets for the next queued task while another task is running. Prewarm state is surfaced in `ml status --json` under the optional `prewarm` field.
|
||||
- **Env prewarm (best-effort)** can build a warmed Podman image keyed by `deps_manifest_sha256` and reuse it for later tasks.
|
||||
|
||||
## Changelog
|
||||
|
||||
See `CHANGELOG.md`.
|
||||
|
||||
## Build
|
||||
|
||||
```bash
|
||||
|
|
@ -66,6 +137,31 @@ See `docs/` for detailed guides:
|
|||
- `docs/src/quick-start.md` – Full setup guide
|
||||
- `docs/src/deployment.md` – Production deployment
|
||||
|
||||
## Source code
|
||||
|
||||
The FetchML source code is intentionally not hosted on GitHub.
|
||||
|
||||
The canonical source repository is available at: `<SOURCE_REPO_URL>`.
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome.
|
||||
|
||||
- **Questions / bug reports**: Use GitHub Issues: `<GITHUB_ISSUES_URL>`. Include:
|
||||
- how to reproduce
|
||||
- expected vs actual behavior
|
||||
- logs/config snippets (sanitize secrets)
|
||||
- OS + versions (Go, Zig, Podman/Docker if relevant)
|
||||
- **Changes**: Submit a patch in a GitHub issue.
|
||||
- Create a topic branch.
|
||||
- Run tests/linters.
|
||||
- Export your change as either:
|
||||
- a patch series: `git format-patch -N origin/main`, or
|
||||
- a single bundle: `git bundle create fetchml.bundle origin/main..HEAD`
|
||||
- Attach the generated files to a GitHub issue at `<GITHUB_ISSUES_URL>`.
|
||||
|
||||
## License
|
||||
|
||||
See LICENSE.
|
||||
FetchML is source-available for transparency and auditability. It is not open-source.
|
||||
|
||||
See `LICENSE`.
|
||||
|
|
|
|||
10
SECURITY.md
10
SECURITY.md
|
|
@ -27,7 +27,7 @@ This will:
|
|||
- **TLS/SSL**: HTTPS encrypted communication
|
||||
- **IP Whitelisting**: Restrict access to trusted networks
|
||||
- **Rate Limiting**: Prevent abuse and DoS attacks
|
||||
- **Reverse Proxy**: Nginx with security headers
|
||||
- **Reverse Proxy**: Caddy with security headers
|
||||
|
||||
### Data Protection
|
||||
- **Path Traversal Protection**: Prevents directory escape attacks
|
||||
|
|
@ -37,7 +37,7 @@ This will:
|
|||
## Configuration Files
|
||||
|
||||
### Secure Config Location
|
||||
- `configs/environments/config-homelab-secure.yaml` - Main secure configuration
|
||||
- `configs/api/homelab-secure.yaml` - Main secure configuration
|
||||
|
||||
### API Keys
|
||||
- `.api-keys` - Generated API keys (600 permissions)
|
||||
|
|
@ -71,7 +71,7 @@ docker-compose -f docker-compose.yml -f docker-compose.homelab-secure.yml up -d
|
|||
source .env.secure
|
||||
|
||||
# Start server
|
||||
./api-server -config configs/environments/config-homelab-secure.yaml
|
||||
./api-server -config configs/api/homelab-secure.yaml
|
||||
```
|
||||
|
||||
## Security Checklist
|
||||
|
|
@ -87,7 +87,7 @@ source .env.secure
|
|||
### Network Security
|
||||
- [ ] Use HTTPS only (disable HTTP)
|
||||
- [ ] Restrict API access to trusted IPs
|
||||
- [ ] Use reverse proxy (nginx)
|
||||
- [ ] Use reverse proxy (caddy)
|
||||
- [ ] Enable security headers
|
||||
- [ ] Monitor access logs
|
||||
|
||||
|
|
@ -174,7 +174,7 @@ curl -s https://api.ipify.org
|
|||
|
||||
Monitor these files:
|
||||
- `logs/fetch_ml.log` - Application logs
|
||||
- `/var/log/nginx/security.log` - Nginx access logs
|
||||
- Caddy access logs (configure if enabled)
|
||||
- Docker logs: `docker logs ml-experiments-api`
|
||||
|
||||
## Best Practices
|
||||
|
|
|
|||
|
|
@ -53,19 +53,23 @@ WORKDIR /app
|
|||
COPY --from=go-builder /app/bin/ /usr/local/bin/
|
||||
COPY --from=zig-builder /app/cli/zig-out/bin/ml /usr/local/bin/
|
||||
|
||||
# Copy configs
|
||||
COPY --from=go-builder /app/configs/ /app/configs/
|
||||
|
||||
# Create directories
|
||||
RUN mkdir -p /data/ml-experiments /home/appuser/.ml && \
|
||||
chown -R appuser:appgroup /data /home/appuser
|
||||
RUN mkdir -p /data/experiments /data/datasets /data/snapshots /home/appuser/.ml && \
|
||||
mkdir -p /app/data/experiments /app/data/datasets /app/data/snapshots /app/logs /app/ssl && \
|
||||
chown -R appuser:appgroup /data /app /home/appuser
|
||||
|
||||
# Switch to app user
|
||||
USER appuser
|
||||
|
||||
# Expose ports
|
||||
EXPOSE 9100 9101
|
||||
EXPOSE 9101
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD wget --no-verbose --tries=1 --spider http://localhost:9100/health || exit 1
|
||||
CMD wget --no-verbose --tries=1 --no-check-certificate --spider https://localhost:9101/health || exit 1
|
||||
|
||||
# Default command
|
||||
CMD ["/usr/local/bin/api-server"]
|
||||
CMD ["/usr/local/bin/api-server", "-config", "/app/configs/api/dev.yaml"]
|
||||
|
|
|
|||
|
|
@ -40,12 +40,15 @@ COPY --from=builder /app/bin/ /usr/local/bin/
|
|||
COPY --from=builder /app/configs/ /app/configs/
|
||||
|
||||
# Create necessary directories
|
||||
RUN mkdir -p /app/data/experiments /app/logs /app/ssl /app/ssh /tmp/fetchml-jobs
|
||||
RUN mkdir -p /app/data/experiments /app/data/datasets /app/data/snapshots /app/logs /app/ssl /app/ssh /tmp/fetchml-jobs && \
|
||||
mkdir -p /data/active/datasets /data/active/snapshots && \
|
||||
mkdir -p /logs && \
|
||||
chown -R appuser:appgroup /app /data /logs
|
||||
|
||||
# Generate SSL certificates
|
||||
RUN openssl req -x509 -newkey rsa:2048 -keyout /app/ssl/key.pem -out /app/ssl/cert.pem -days 365 -nodes \
|
||||
-subj "/C=US/ST=Homelab/L=Local/O=ML/OU=Experiments/CN=localhost" && \
|
||||
chmod 644 /app/ssl/cert.pem /app/ssl/key.pem
|
||||
chmod 644 /app/ssl/cert.pem && chmod 600 /app/ssl/key.pem
|
||||
|
||||
# Generate SSH keys for container communication
|
||||
RUN ssh-keygen -t rsa -b 2048 -f /app/ssh/id_rsa -N "" && \
|
||||
|
|
@ -70,4 +73,4 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
|
|||
CMD curl -k -f https://localhost:9101/health || exit 1
|
||||
|
||||
# Default command for API server
|
||||
CMD ["/usr/local/bin/api-server", "-config", "/app/configs/config.yaml"]
|
||||
CMD ["/usr/local/bin/api-server", "-config", "/app/configs/api/prod.yaml"]
|
||||
|
|
|
|||
|
|
@ -55,8 +55,10 @@ COPY --from=builder /app/bin/ /usr/local/bin/
|
|||
COPY --from=builder /app/configs/ /app/configs/
|
||||
|
||||
# Create necessary directories with proper permissions
|
||||
RUN mkdir -p /app/data/experiments /app/logs /app/ssl /tmp/fetchml-jobs && \
|
||||
chown -R appuser:appgroup /app && \
|
||||
RUN mkdir -p /app/data/experiments /app/data/datasets /app/data/snapshots /app/logs /app/ssl /tmp/fetchml-jobs && \
|
||||
mkdir -p /data/active/datasets /data/active/snapshots && \
|
||||
mkdir -p /logs && \
|
||||
chown -R appuser:appgroup /app /data /logs && \
|
||||
chmod 750 /app/data/experiments /app/logs
|
||||
|
||||
# Generate SSL certificates with stronger crypto
|
||||
|
|
@ -144,4 +146,4 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
|
|||
CMD curl -k -f https://localhost:9101/health || exit 1
|
||||
|
||||
# Default command for API server
|
||||
CMD ["/usr/local/bin/api-server", "-config", "/app/configs/config.yaml"]
|
||||
CMD ["/usr/local/bin/api-server", "-config", "/app/configs/api/prod.yaml"]
|
||||
|
|
|
|||
|
|
@ -45,13 +45,16 @@ COPY --from=builder /app/bin/ /usr/local/bin/
|
|||
COPY --from=builder /app/configs/ /app/configs/
|
||||
|
||||
# Create necessary directories
|
||||
RUN mkdir -p /app/data/experiments /app/logs /app/ssl /tmp/fetchml-jobs && \
|
||||
chown -R appuser:appgroup /app
|
||||
RUN mkdir -p /app/data/experiments /app/data/datasets /app/data/snapshots /app/logs /app/ssl /tmp/fetchml-jobs && \
|
||||
mkdir -p /data/active/datasets /data/active/snapshots && \
|
||||
mkdir -p /logs && \
|
||||
chown -R appuser:appgroup /app /data /logs
|
||||
|
||||
# Generate SSL certificates
|
||||
RUN openssl req -x509 -newkey rsa:2048 -keyout /app/ssl/key.pem -out /app/ssl/cert.pem -days 365 -nodes \
|
||||
-subj "/C=US/ST=Homelab/L=Local/O=ML/OU=Experiments/CN=localhost" && \
|
||||
chmod 644 /app/ssl/cert.pem /app/ssl/key.pem
|
||||
chmod 644 /app/ssl/cert.pem && chmod 600 /app/ssl/key.pem && \
|
||||
chown -R appuser:appgroup /app/ssl
|
||||
|
||||
# Generate SSH keys for worker user
|
||||
RUN ssh-keygen -t rsa -b 4096 -f /home/worker/.ssh/id_rsa -N "" && \
|
||||
|
|
@ -99,4 +102,4 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
|
|||
CMD curl -k -f https://localhost:9101/health || exit 1
|
||||
|
||||
# Default command for API server
|
||||
CMD ["/usr/local/bin/api-server", "-config", "/app/configs/config.yaml"]
|
||||
CMD ["/usr/local/bin/api-server", "-config", "/app/configs/api/prod.yaml"]
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
FROM golang:1.25-alpine AS builder
|
||||
|
||||
# Install dependencies
|
||||
RUN apk add --no-cache git make
|
||||
RUN apk add --no-cache git make gcc musl-dev
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
|
@ -17,13 +17,14 @@ RUN go mod download
|
|||
COPY . .
|
||||
|
||||
# Build Go binaries
|
||||
RUN go build -o bin/api-server cmd/api-server/main.go
|
||||
RUN CGO_ENABLED=1 go build -o bin/api-server cmd/api-server/main.go && \
|
||||
CGO_ENABLED=1 go build -o bin/worker ./cmd/worker
|
||||
|
||||
# Final stage
|
||||
FROM alpine:3.19
|
||||
|
||||
# Install runtime dependencies
|
||||
RUN apk add --no-cache ca-certificates redis openssl
|
||||
RUN apk add --no-cache bash ca-certificates redis openssl curl podman fuse-overlayfs slirp4netns iptables
|
||||
|
||||
# Create app user
|
||||
RUN addgroup -g 1001 -S appgroup && \
|
||||
|
|
@ -37,15 +38,17 @@ COPY --from=builder /app/bin/ /usr/local/bin/
|
|||
|
||||
# Copy configs and templates
|
||||
COPY --from=builder /app/configs/ /app/configs/
|
||||
COPY --from=builder /app/nginx/ /app/nginx/
|
||||
|
||||
# Create necessary directories
|
||||
RUN mkdir -p /app/data/experiments /app/logs /app/ssl
|
||||
RUN mkdir -p /app/data/experiments /app/data/datasets /app/data/snapshots /app/logs /app/ssl
|
||||
|
||||
# Generate SSL certificates for container use
|
||||
RUN openssl req -x509 -newkey rsa:2048 -keyout /app/ssl/key.pem -out /app/ssl/cert.pem -days 365 -nodes \
|
||||
-subj "/C=US/ST=Homelab/L=Local/O=ML/OU=Experiments/CN=localhost" && \
|
||||
chmod 644 /app/ssl/cert.pem /app/ssl/key.pem
|
||||
chmod 644 /app/ssl/cert.pem && chmod 600 /app/ssl/key.pem
|
||||
|
||||
# Ensure app user can write to data/logs and read TLS material
|
||||
RUN chown -R appuser:appgroup /app/data /app/logs /app/ssl /app/configs
|
||||
|
||||
# Switch to app user
|
||||
USER appuser
|
||||
|
|
@ -55,7 +58,7 @@ EXPOSE 9101
|
|||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
|
||||
CMD curl -k -f https://localhost:9101/health || exit 1
|
||||
CMD curl -f http://localhost:9101/health || curl -k -f https://localhost:9101/health || exit 1
|
||||
|
||||
# Default command
|
||||
CMD ["/usr/local/bin/api-server", "-config", "/app/configs/config.yaml"]
|
||||
CMD ["/usr/local/bin/api-server", "-config", "/app/configs/api/dev.yaml"]
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
FROM golang:1.25-alpine AS builder
|
||||
|
||||
# Install dependencies
|
||||
RUN apk add --no-cache git
|
||||
RUN apk add --no-cache git gcc musl-dev
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
|
@ -17,7 +17,7 @@ RUN go mod download
|
|||
COPY . .
|
||||
|
||||
# Build only Go binaries (skip Zig)
|
||||
RUN go build -o bin/api-server cmd/api-server/main.go && \
|
||||
RUN CGO_ENABLED=1 go build -o bin/api-server cmd/api-server/main.go && \
|
||||
go build -o bin/worker cmd/worker/worker_server.go cmd/worker/worker_config.go && \
|
||||
go build -o bin/tui ./cmd/tui
|
||||
|
||||
|
|
@ -25,7 +25,7 @@ RUN go build -o bin/api-server cmd/api-server/main.go && \
|
|||
FROM alpine:3.19
|
||||
|
||||
# Install runtime dependencies
|
||||
RUN apk add --no-cache ca-certificates curl
|
||||
RUN apk add --no-cache ca-certificates curl openssl
|
||||
|
||||
# Create app user
|
||||
RUN addgroup -g 1001 -S appgroup && \
|
||||
|
|
@ -41,7 +41,16 @@ COPY --from=builder /app/bin/ /usr/local/bin/
|
|||
COPY --from=builder /app/configs/ /app/configs/
|
||||
|
||||
# Create necessary directories
|
||||
RUN mkdir -p /app/data/experiments /app/logs
|
||||
RUN mkdir -p /app/data/experiments /app/data/datasets /app/data/snapshots /app/logs /app/ssl && \
|
||||
mkdir -p /data/experiments /data/datasets /data/snapshots
|
||||
|
||||
# Generate SSL certificates for container use
|
||||
RUN openssl req -x509 -newkey rsa:2048 -keyout /app/ssl/key.pem -out /app/ssl/cert.pem -days 365 -nodes \
|
||||
-subj "/C=US/ST=Test/L=Local/O=FetchML/OU=Tests/CN=localhost" && \
|
||||
chmod 644 /app/ssl/cert.pem && chmod 600 /app/ssl/key.pem
|
||||
|
||||
# Ensure app user can write to data/logs and read TLS material
|
||||
RUN chown -R appuser:appgroup /app/data /app/logs /app/ssl /app/configs /data
|
||||
|
||||
# Switch to app user
|
||||
USER appuser
|
||||
|
|
@ -50,4 +59,4 @@ USER appuser
|
|||
EXPOSE 9101
|
||||
|
||||
# Default command
|
||||
CMD ["/usr/local/bin/api-server", "-config", "/app/configs/environments/config-local.yaml"]
|
||||
CMD ["/usr/local/bin/api-server", "-config", "/app/configs/api/dev.yaml"]
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
|
|
@ -10,6 +11,7 @@ import (
|
|||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
"github.com/jfraeys/fetch_ml/internal/fileutil"
|
||||
"github.com/xeipuuv/gojsonschema"
|
||||
"gopkg.in/yaml.v3"
|
||||
|
|
@ -68,20 +70,7 @@ func loadSchema(schemaPath string) (gojsonschema.JSONLoader, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
tmpFile, err := os.CreateTemp("", "fetchml-schema-*.json")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
_ = tmpFile.Close()
|
||||
_ = os.Remove(tmpFile.Name())
|
||||
}()
|
||||
|
||||
if _, err := tmpFile.Write(schemaJSON); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return gojsonschema.NewReferenceLoader("file://" + filepath.ToSlash(tmpFile.Name())), nil
|
||||
return gojsonschema.NewBytesLoader(schemaJSON), nil
|
||||
}
|
||||
|
||||
func validateConfig(schemaLoader gojsonschema.JSONLoader, configPath string) error {
|
||||
|
|
@ -90,12 +79,27 @@ func validateConfig(schemaLoader gojsonschema.JSONLoader, configPath string) err
|
|||
return err
|
||||
}
|
||||
|
||||
var configYAML interface{}
|
||||
if err := yaml.Unmarshal(data, &configYAML); err != nil {
|
||||
return fmt.Errorf("failed to parse YAML: %w", err)
|
||||
ext := strings.ToLower(filepath.Ext(configPath))
|
||||
var decoded any
|
||||
switch ext {
|
||||
case ".toml":
|
||||
var configTOML map[string]any
|
||||
if _, err := toml.Decode(string(data), &configTOML); err != nil {
|
||||
return fmt.Errorf("failed to parse TOML: %w", err)
|
||||
}
|
||||
decoded = configTOML
|
||||
default:
|
||||
// YAML (default)
|
||||
var configYAML any
|
||||
dec := yaml.NewDecoder(bytes.NewReader(data))
|
||||
dec.KnownFields(false)
|
||||
if err := dec.Decode(&configYAML); err != nil {
|
||||
return fmt.Errorf("failed to parse YAML: %w", err)
|
||||
}
|
||||
decoded = configYAML
|
||||
}
|
||||
|
||||
configJSON, err := json.Marshal(configYAML)
|
||||
configJSON, err := json.Marshal(decoded)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,10 +40,10 @@ type DataConfig struct {
|
|||
CleanupInterval int `yaml:"cleanup_interval_min"` // Run cleanup every X minutes
|
||||
|
||||
// Podman integration
|
||||
PodmanImage string `yaml:"podman_image"`
|
||||
ContainerWorkspace string `yaml:"container_workspace"`
|
||||
ContainerResults string `yaml:"container_results"`
|
||||
GPUAccess bool `yaml:"gpu_access"`
|
||||
PodmanImage string `yaml:"podman_image"`
|
||||
ContainerWorkspace string `yaml:"container_workspace"`
|
||||
ContainerResults string `yaml:"container_results"`
|
||||
GPUDevices []string `yaml:"gpu_devices"`
|
||||
}
|
||||
|
||||
// LoadDataConfig loads data manager configuration from a YAML file.
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import (
|
|||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
|
@ -23,6 +24,13 @@ import (
|
|||
"github.com/jfraeys/fetch_ml/internal/telemetry"
|
||||
)
|
||||
|
||||
func shellQuote(s string) string {
|
||||
if s == "" {
|
||||
return "''"
|
||||
}
|
||||
return "'" + strings.ReplaceAll(s, "'", "'\"'\"'") + "'"
|
||||
}
|
||||
|
||||
// SSHClient alias for convenience.
|
||||
type SSHClient = network.SSHClient
|
||||
|
||||
|
|
@ -37,6 +45,32 @@ type DataManager struct {
|
|||
logger *logging.Logger
|
||||
}
|
||||
|
||||
func (dm *DataManager) archiveDatasetOnML(datasetName string) (string, error) {
|
||||
datasetName = strings.TrimSpace(datasetName)
|
||||
if err := container.ValidateJobName(datasetName); err != nil {
|
||||
return "", fmt.Errorf("invalid dataset name: %w", err)
|
||||
}
|
||||
if strings.TrimSpace(dm.config.MLDataDir) == "" {
|
||||
return "", fmt.Errorf("missing ml_data_dir")
|
||||
}
|
||||
|
||||
stamp := time.Now().UTC().Format("20060102-150405")
|
||||
archiveRoot := filepath.Join(dm.config.MLDataDir, ".archive", stamp)
|
||||
src := filepath.Join(dm.config.MLDataDir, datasetName)
|
||||
dst := filepath.Join(archiveRoot, datasetName)
|
||||
|
||||
cmd := fmt.Sprintf(
|
||||
"mkdir -p %s && mv %s %s",
|
||||
shellQuote(archiveRoot),
|
||||
shellQuote(src),
|
||||
shellQuote(dst),
|
||||
)
|
||||
if _, err := dm.mlServer.Exec(cmd); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
// DataFetchRequest represents a request to fetch datasets.
|
||||
type DataFetchRequest struct {
|
||||
JobName string `json:"job_name"`
|
||||
|
|
@ -141,7 +175,11 @@ func (dm *DataManager) FetchDataset(jobName, datasetName string) error {
|
|||
})
|
||||
}
|
||||
|
||||
func (dm *DataManager) fetchDatasetInternal(ctx context.Context, jobName, datasetName string) error {
|
||||
func (dm *DataManager) fetchDatasetInternal(
|
||||
ctx context.Context,
|
||||
jobName string,
|
||||
datasetName string,
|
||||
) error {
|
||||
if err := container.ValidateJobName(datasetName); err != nil {
|
||||
return &errtypes.DataFetchError{
|
||||
Dataset: datasetName,
|
||||
|
|
@ -225,9 +263,14 @@ func (dm *DataManager) fetchDatasetInternal(ctx context.Context, jobName, datase
|
|||
|
||||
ioBefore, ioErr := telemetry.ReadProcessIO()
|
||||
start := time.Now()
|
||||
out, err := telemetry.ExecWithMetrics(dm.logger, "dataset transfer", time.Since(start), func() (string, error) {
|
||||
return dm.nasServer.ExecContext(ctx, rsyncCmd)
|
||||
})
|
||||
out, err := telemetry.ExecWithMetrics(
|
||||
dm.logger,
|
||||
"dataset transfer",
|
||||
time.Since(start),
|
||||
func() (string, error) {
|
||||
return dm.nasServer.ExecContext(ctx, rsyncCmd)
|
||||
},
|
||||
)
|
||||
duration := time.Since(start)
|
||||
|
||||
if err != nil {
|
||||
|
|
@ -413,64 +456,101 @@ func (dm *DataManager) CleanupOldData() error {
|
|||
"total_size_gb", totalSizeGB,
|
||||
"dataset_count", len(datasets))
|
||||
|
||||
// Delete datasets older than max age or if over size limit
|
||||
// Archive datasets older than max age or if over size limit
|
||||
maxAge := time.Duration(dm.config.MaxAgeHours) * time.Hour
|
||||
maxSize := int64(dm.config.MaxSizeGB) * 1024 * 1024 * 1024
|
||||
|
||||
var deleted []string
|
||||
// Ensure deterministic ordering when needing to reduce size.
|
||||
sort.Slice(datasets, func(i, j int) bool {
|
||||
ai := datasets[i].LastAccess
|
||||
aj := datasets[j].LastAccess
|
||||
if ai.IsZero() && aj.IsZero() {
|
||||
return datasets[i].Name < datasets[j].Name
|
||||
}
|
||||
if ai.IsZero() {
|
||||
return true
|
||||
}
|
||||
if aj.IsZero() {
|
||||
return false
|
||||
}
|
||||
if ai.Equal(aj) {
|
||||
return datasets[i].Name < datasets[j].Name
|
||||
}
|
||||
return ai.Before(aj)
|
||||
})
|
||||
|
||||
valid := make([]DatasetInfo, 0, len(datasets))
|
||||
for _, ds := range datasets {
|
||||
shouldDelete := false
|
||||
|
||||
// Check age
|
||||
if !ds.LastAccess.IsZero() && time.Since(ds.LastAccess) > maxAge {
|
||||
logger.Info("dataset is old, marking for deletion",
|
||||
"dataset", ds.Name,
|
||||
"last_access", ds.LastAccess,
|
||||
"age_hours", time.Since(ds.LastAccess).Hours())
|
||||
shouldDelete = true
|
||||
name := strings.TrimSpace(ds.Name)
|
||||
if err := container.ValidateJobName(name); err != nil {
|
||||
logger.Warn("skipping dataset with invalid name", "dataset", ds.Name)
|
||||
continue
|
||||
}
|
||||
ds.Name = name
|
||||
valid = append(valid, ds)
|
||||
}
|
||||
|
||||
// Check if over size limit
|
||||
if totalSize > maxSize {
|
||||
logger.Info("over size limit, deleting oldest dataset",
|
||||
"dataset", ds.Name,
|
||||
"current_size_gb", totalSizeGB,
|
||||
"max_size_gb", dm.config.MaxSizeGB)
|
||||
shouldDelete = true
|
||||
archivedSet := make(map[string]struct{}, len(valid))
|
||||
var archived []string
|
||||
archiveOne := func(ds DatasetInfo, reason string) {
|
||||
if _, ok := archivedSet[ds.Name]; ok {
|
||||
return
|
||||
}
|
||||
|
||||
if shouldDelete {
|
||||
path := filepath.Join(dm.config.MLDataDir, ds.Name)
|
||||
logger.Info("deleting dataset", "dataset", ds.Name, "path", path)
|
||||
|
||||
if _, err := dm.mlServer.Exec(fmt.Sprintf("rm -rf %s", path)); err != nil {
|
||||
logger.Error("failed to delete dataset",
|
||||
path := filepath.Join(dm.config.MLDataDir, ds.Name)
|
||||
logger.Info("archiving dataset", "dataset", ds.Name, "path", path, "reason", reason)
|
||||
if _, err := dm.archiveDatasetOnML(ds.Name); err != nil {
|
||||
logger.Error("failed to archive dataset",
|
||||
"dataset", ds.Name,
|
||||
"error", err)
|
||||
return
|
||||
}
|
||||
archivedSet[ds.Name] = struct{}{}
|
||||
archived = append(archived, ds.Name)
|
||||
totalSize -= ds.SizeBytes
|
||||
totalSizeGB = float64(totalSize) / (1024 * 1024 * 1024)
|
||||
if dm.taskQueue != nil {
|
||||
redisClient := dm.taskQueue.GetRedisClient()
|
||||
if err := redisClient.Del(dm.ctx, fmt.Sprintf("ml:dataset:%s", ds.Name)).Err(); err != nil {
|
||||
logger.Warn("failed to delete dataset from Redis",
|
||||
"dataset", ds.Name,
|
||||
"error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
deleted = append(deleted, ds.Name)
|
||||
totalSize -= ds.SizeBytes
|
||||
|
||||
// FIXED: Remove from Redis only if available, with error handling
|
||||
if dm.taskQueue != nil {
|
||||
redisClient := dm.taskQueue.GetRedisClient()
|
||||
if err := redisClient.Del(dm.ctx, fmt.Sprintf("ml:dataset:%s", ds.Name)).Err(); err != nil {
|
||||
logger.Warn("failed to delete dataset from Redis",
|
||||
"dataset", ds.Name,
|
||||
"error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(deleted) > 0 {
|
||||
// First archive datasets older than maxAge.
|
||||
now := time.Now()
|
||||
for _, ds := range valid {
|
||||
if ds.LastAccess.IsZero() {
|
||||
continue
|
||||
}
|
||||
if now.Sub(ds.LastAccess) > maxAge {
|
||||
archiveOne(ds, "max_age")
|
||||
}
|
||||
}
|
||||
|
||||
// Then archive additional oldest datasets until we're under maxSize.
|
||||
for totalSize > maxSize {
|
||||
found := false
|
||||
for _, ds := range valid {
|
||||
if _, ok := archivedSet[ds.Name]; ok {
|
||||
continue
|
||||
}
|
||||
archiveOne(ds, "max_size")
|
||||
found = true
|
||||
break
|
||||
}
|
||||
if !found {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if len(archived) > 0 {
|
||||
logger.Info("cleanup complete",
|
||||
"deleted_count", len(deleted),
|
||||
"deleted_datasets", deleted)
|
||||
"archived_count", len(archived),
|
||||
"archived_datasets", archived)
|
||||
} else {
|
||||
logger.Info("cleanup complete", "deleted_count", 0)
|
||||
logger.Info("cleanup complete", "archived_count", 0)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
@ -625,17 +705,29 @@ func (dm *DataManager) Close() {
|
|||
|
||||
if dm.mlServer != nil {
|
||||
if err := dm.mlServer.Close(); err != nil {
|
||||
dm.logger.Job(dm.ctx, "data_manager", "").Warn("error closing ML server connection", "error", err)
|
||||
dm.logger.Job(dm.ctx, "data_manager", "").Warn(
|
||||
"error closing ML server connection",
|
||||
"error",
|
||||
err,
|
||||
)
|
||||
}
|
||||
}
|
||||
if dm.nasServer != nil {
|
||||
if err := dm.nasServer.Close(); err != nil {
|
||||
dm.logger.Job(dm.ctx, "data_manager", "").Warn("error closing NAS server connection", "error", err)
|
||||
dm.logger.Job(dm.ctx, "data_manager", "").Warn(
|
||||
"error closing NAS server connection",
|
||||
"error",
|
||||
err,
|
||||
)
|
||||
}
|
||||
}
|
||||
if dm.taskQueue != nil {
|
||||
if err := dm.taskQueue.Close(); err != nil {
|
||||
dm.logger.Job(dm.ctx, "data_manager", "").Warn("error closing Redis connection", "error", err)
|
||||
dm.logger.Job(dm.ctx, "data_manager", "").Warn(
|
||||
"error closing Redis connection",
|
||||
"error",
|
||||
err,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -650,7 +742,7 @@ func main() {
|
|||
// Get API key from various sources
|
||||
apiKey := auth.GetAPIKeyFromSources(authFlags)
|
||||
|
||||
configFile := "configs/config-local.yaml"
|
||||
configFile := "configs/api/dev.yaml"
|
||||
if authFlags.ConfigFile != "" {
|
||||
configFile = authFlags.ConfigFile
|
||||
}
|
||||
|
|
@ -658,12 +750,12 @@ func main() {
|
|||
// Parse command line args
|
||||
if len(os.Args) < 2 {
|
||||
fmt.Println("Usage:")
|
||||
fmt.Println(" data_manager [--config configs/config-local.yaml] [--api-key <key>] " +
|
||||
fmt.Println(" data_manager [--config configs/api/dev.yaml] [--api-key <key>] " +
|
||||
"fetch <job-name> <dataset> [dataset...]")
|
||||
fmt.Println(" data_manager [--config configs/config-local.yaml] [--api-key <key>] list")
|
||||
fmt.Println(" data_manager [--config configs/config-local.yaml] [--api-key <key>] cleanup")
|
||||
fmt.Println(" data_manager [--config configs/config-local.yaml] [--api-key <key>] validate <dataset>")
|
||||
fmt.Println(" data_manager [--config configs/config-local.yaml] [--api-key <key>] daemon")
|
||||
fmt.Println(" data_manager [--config configs/api/dev.yaml] [--api-key <key>] list")
|
||||
fmt.Println(" data_manager [--config configs/api/dev.yaml] [--api-key <key>] cleanup")
|
||||
fmt.Println(" data_manager [--config configs/api/dev.yaml] [--api-key <key>] validate <dataset>")
|
||||
fmt.Println(" data_manager [--config configs/api/dev.yaml] [--api-key <key>] daemon")
|
||||
fmt.Println()
|
||||
auth.PrintAuthHelp()
|
||||
os.Exit(1)
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ func main() {
|
|||
|
||||
for _, user := range users {
|
||||
insert := `
|
||||
INSERT OR REPLACE INTO api_keys (user_id, key_hash, admin, roles, permissions)
|
||||
INSERT OR REPLACE INTO api_keys (user_id, key_hash, admin, roles, permissions)
|
||||
VALUES (?, ?, ?, ?, ?)`
|
||||
|
||||
if _, err := db.ExecContext(context.Background(), insert,
|
||||
|
|
|
|||
Loading…
Reference in a new issue