Integration test updates: - jupyter_experiment_test.go: update for new workspace handling - run_manifest_test.go: reproducibility manifest validation - secrets_integration_test.go: KMS and secret provider tests - storage_redis_integration_test.go: Redis-backed storage tests Unit test updates: - response_helpers_test.go: API response helper tests - config_hash_test.go: configuration hashing for reproducibility - filetype_test.go: security file type detection tests Load testing: - load_test.go: scheduler load and stress tests
105 lines
3 KiB
Go
105 lines
3 KiB
Go
package reproducibility
|
|
|
|
import (
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/jfraeys/fetch_ml/internal/manifest"
|
|
"github.com/jfraeys/fetch_ml/internal/worker"
|
|
)
|
|
|
|
// TestRunManifestReproducibility verifies that two identical runs produce
|
|
// manifests that can be compared for reproducibility
|
|
func TestRunManifestReproducibility(t *testing.T) {
|
|
t.Run("IdenticalRunsProduceComparableManifests", func(t *testing.T) {
|
|
// Create two run directories with identical content
|
|
run1Dir := t.TempDir()
|
|
run2Dir := t.TempDir()
|
|
|
|
// Create identical config
|
|
cfg := &worker.Config{
|
|
Host: "localhost",
|
|
Port: 22,
|
|
MaxWorkers: 4,
|
|
GPUVendor: "none",
|
|
ComplianceMode: "standard",
|
|
Sandbox: worker.SandboxConfig{
|
|
NetworkMode: "none",
|
|
SeccompProfile: "default-hardened",
|
|
NoNewPrivileges: true,
|
|
},
|
|
}
|
|
cfg.Sandbox.ApplySecurityDefaults()
|
|
|
|
// Compute config hash (should be identical for identical configs)
|
|
hash1, err := cfg.ComputeResolvedConfigHash()
|
|
if err != nil {
|
|
t.Fatalf("Failed to compute hash for run 1: %v", err)
|
|
}
|
|
|
|
hash2, err := cfg.ComputeResolvedConfigHash()
|
|
if err != nil {
|
|
t.Fatalf("Failed to compute hash for run 2: %v", err)
|
|
}
|
|
|
|
if hash1 != hash2 {
|
|
t.Error("Identical configs should produce identical hashes")
|
|
}
|
|
|
|
// Create identical output files
|
|
for _, dir := range []string{run1Dir, run2Dir} {
|
|
resultsDir := filepath.Join(dir, "results")
|
|
os.MkdirAll(resultsDir, 0750)
|
|
os.WriteFile(filepath.Join(resultsDir, "metrics.jsonl"), []byte("{\"accuracy\": 0.95}\n"), 0600)
|
|
}
|
|
|
|
// Create manifests with identical environment
|
|
created := time.Now().UTC()
|
|
m1 := manifest.NewRunManifest("run-1", "task-1", "job-1", created)
|
|
m1.Environment = &manifest.ExecutionEnvironment{
|
|
ConfigHash: hash1,
|
|
GPUDetectionMethod: "config",
|
|
MaxWorkers: 4,
|
|
SandboxNetworkMode: "none",
|
|
SandboxNoNewPrivs: true,
|
|
ComplianceMode: "standard",
|
|
}
|
|
|
|
m2 := manifest.NewRunManifest("run-2", "task-2", "job-2", created)
|
|
m2.Environment = &manifest.ExecutionEnvironment{
|
|
ConfigHash: hash2,
|
|
GPUDetectionMethod: "config",
|
|
MaxWorkers: 4,
|
|
SandboxNetworkMode: "none",
|
|
SandboxNoNewPrivs: true,
|
|
ComplianceMode: "standard",
|
|
}
|
|
|
|
// Write manifests
|
|
if err := m1.WriteToDir(run1Dir); err != nil {
|
|
t.Fatalf("Failed to write manifest 1: %v", err)
|
|
}
|
|
if err := m2.WriteToDir(run2Dir); err != nil {
|
|
t.Fatalf("Failed to write manifest 2: %v", err)
|
|
}
|
|
|
|
// Load and compare
|
|
loaded1, err := manifest.LoadFromDir(run1Dir)
|
|
if err != nil {
|
|
t.Fatalf("Failed to load manifest 1: %v", err)
|
|
}
|
|
loaded2, err := manifest.LoadFromDir(run2Dir)
|
|
if err != nil {
|
|
t.Fatalf("Failed to load manifest 2: %v", err)
|
|
}
|
|
|
|
// Compare environments
|
|
if loaded1.Environment.ConfigHash != loaded2.Environment.ConfigHash {
|
|
t.Error("Reproducibility check: ConfigHash should match for identical configs")
|
|
}
|
|
|
|
t.Log("Run manifest reproducibility verified: identical configs produce comparable manifests")
|
|
})
|
|
}
|