fetch_ml/tests/unit/manifest/schema_test.go
Jeremie Fraeys d87c556afa
test(all): update test suite for scheduler and security features
Update comprehensive test coverage:
- E2E tests with scheduler integration
- Integration tests with tenant isolation
- Unit tests with security assertions
- Security tests with audit validation
- Audit verification tests
- Auth tests with tenant scoping
- Config validation tests
- Container security tests
- Worker tests with scheduler mock
- Environment pool tests
- Load tests with distributed patterns
- Test fixtures with scheduler support
- Update go.mod/go.sum with new dependencies
2026-02-26 12:08:46 -05:00

327 lines
9.7 KiB
Go

package manifest
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"os"
"path/filepath"
"runtime"
"testing"
"github.com/jfraeys/fetch_ml/internal/manifest"
"github.com/xeipuuv/gojsonschema"
)
// TestSchemaUnchanged verifies that the generated schema matches the committed schema.
// This test fails if the manifest structs have drifted from the schema without updating it.
func TestSchemaUnchanged(t *testing.T) {
// Get the project root (this test runs from internal/manifest/)
_, testFile, _, _ := runtime.Caller(0)
testDir := filepath.Dir(testFile)
schemaPath := filepath.Join(testDir, "..", "..", "..", "internal", "manifest", "schema.json")
// Load the committed schema
committedSchemaData, err := os.ReadFile(schemaPath)
if err != nil {
t.Fatalf("failed to read committed schema: %v", err)
}
// Parse and re-serialize the committed schema to normalize formatting
var schema map[string]any
if err := json.Unmarshal(committedSchemaData, &schema); err != nil {
t.Fatalf("failed to parse committed schema: %v", err)
}
// Re-serialize with consistent formatting
normalizedData, err := json.MarshalIndent(schema, "", " ")
if err != nil {
t.Fatalf("failed to normalize schema: %v", err)
}
// For now, this test documents the current schema state.
// In a full implementation, GenerateSchemaFromStructs() would generate
// the schema from Go struct definitions using reflection.
// If schemas differ, it means the structs changed without updating schema.json
// Verify the schema can be parsed and has required fields
if _, ok := schema["version"]; !ok {
t.Error("schema missing version field")
}
if _, ok := schema["title"]; !ok {
t.Error("schema missing title field")
}
// Log normalized hash for debugging
normalizedHash := sha256.Sum256(normalizedData)
t.Logf("Normalized schema hash: %s", hex.EncodeToString(normalizedHash[:]))
// The test passes if schema is valid JSON with required fields
// TODO: When GenerateSchemaFromStructs() is fully implemented,
// compare committedSchemaData against generated schema
}
// TestSchemaValidatesExampleManifest verifies the schema can validate a correct manifest
func TestSchemaValidatesExampleManifest(t *testing.T) {
_, testFile, _, _ := runtime.Caller(0)
testDir := filepath.Dir(testFile)
schemaPath := filepath.Join(testDir, "..", "..", "..", "internal", "manifest", "schema.json")
schemaLoader, err := loadSchemaFromFile(schemaPath)
if err != nil {
t.Fatalf("failed to load schema: %v", err)
}
// Create a valid example manifest
exampleManifest := map[string]any{
"run_id": "test-run-123",
"task_id": "test-task-456",
"job_name": "test-job",
"created_at": "2026-02-23T12:00:00Z",
"environment": map[string]any{
"config_hash": "abc123def456",
"gpu_count": 2,
"gpu_detection_method": "nvml",
"max_workers": 4,
"sandbox_network_mode": "bridge",
"sandbox_no_new_privs": true,
"compliance_mode": "standard",
},
"artifacts": map[string]any{
"discovery_time": "2026-02-23T12:00:00Z",
"files": []map[string]any{
{
"path": "model.pt",
"size_bytes": 1024,
"modified": "2026-02-23T12:00:00Z",
},
},
"total_size_bytes": 1024,
"exclusions": []map[string]any{},
},
}
manifestJSON, err := json.Marshal(exampleManifest)
if err != nil {
t.Fatalf("failed to marshal example manifest: %v", err)
}
result, err := gojsonschema.Validate(schemaLoader, gojsonschema.NewBytesLoader(manifestJSON))
if err != nil {
t.Fatalf("schema validation error: %v", err)
}
if !result.Valid() {
var errors []string
for _, err := range result.Errors() {
errors = append(errors, err.String())
}
t.Errorf("example manifest failed validation: %v", errors)
}
}
// TestSchemaRejectsInvalidManifest verifies the schema catches invalid manifests
func TestSchemaRejectsInvalidManifest(t *testing.T) {
_, testFile, _, _ := runtime.Caller(0)
testDir := filepath.Dir(testFile)
schemaPath := filepath.Join(testDir, "..", "..", "..", "internal", "manifest", "schema.json")
schemaLoader, err := loadSchemaFromFile(schemaPath)
if err != nil {
t.Fatalf("failed to load schema: %v", err)
}
testCases := []struct {
manifest map[string]any
name string
}{
{
name: "missing required field run_id",
manifest: map[string]any{
"task_id": "test-task",
"job_name": "test-job",
"created_at": "2026-02-23T12:00:00Z",
},
},
{
name: "missing required environment.config_hash",
manifest: map[string]any{
"run_id": "test-run",
"task_id": "test-task",
"job_name": "test-job",
"created_at": "2026-02-23T12:00:00Z",
"environment": map[string]any{
"gpu_count": 0,
"max_workers": 4,
"sandbox_network_mode": "bridge",
"sandbox_no_new_privs": true,
// config_hash is missing
},
},
},
{
name: "invalid compliance_mode value",
manifest: map[string]any{
"run_id": "test-run",
"task_id": "test-task",
"job_name": "test-job",
"created_at": "2026-02-23T12:00:00Z",
"environment": map[string]any{
"config_hash": "abc123",
"gpu_count": 0,
"max_workers": 4,
"sandbox_network_mode": "bridge",
"sandbox_no_new_privs": true,
"compliance_mode": "invalid_mode",
},
},
},
{
name: "negative size_bytes in artifact",
manifest: map[string]any{
"run_id": "test-run",
"task_id": "test-task",
"job_name": "test-job",
"created_at": "2026-02-23T12:00:00Z",
"environment": map[string]any{
"config_hash": "abc123",
"gpu_count": 0,
"max_workers": 4,
"sandbox_network_mode": "bridge",
"sandbox_no_new_privs": true,
},
"artifacts": map[string]any{
"discovery_time": "2026-02-23T12:00:00Z",
"files": []map[string]any{
{
"path": "model.pt",
"size_bytes": -1, // Invalid: negative
"modified": "2026-02-23T12:00:00Z",
},
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
manifestJSON, err := json.Marshal(tc.manifest)
if err != nil {
t.Fatalf("failed to marshal manifest: %v", err)
}
result, err := gojsonschema.Validate(schemaLoader, gojsonschema.NewBytesLoader(manifestJSON))
if err != nil {
t.Fatalf("schema validation error: %v", err)
}
if result.Valid() {
t.Errorf("expected validation to fail for %s, but it passed", tc.name)
}
})
}
}
// TestSchemaVersionMatchesConst verifies the schema version in JSON matches the Go constant
func TestSchemaVersionMatchesConst(t *testing.T) {
_, testFile, _, _ := runtime.Caller(0)
testDir := filepath.Dir(testFile)
// Schema is in internal/manifest, not tests/unit/manifest
schemaPath := filepath.Join(testDir, "..", "..", "..", "internal", "manifest", "schema.json")
schemaData, err := os.ReadFile(schemaPath)
if err != nil {
t.Fatalf("failed to read schema: %v", err)
}
var schema map[string]any
if err := json.Unmarshal(schemaData, &schema); err != nil {
t.Fatalf("failed to parse schema: %v", err)
}
schemaVersion, ok := schema["version"].(string)
if !ok {
t.Fatalf("schema does not have a version field")
}
if schemaVersion != manifest.SchemaVersion {
t.Errorf("schema version mismatch: schema.json has %s, but schema_version.go has %s",
schemaVersion, manifest.SchemaVersion)
}
}
// loadSchemaFromFile loads a JSON schema from a file path
func loadSchemaFromFile(path string) (gojsonschema.JSONLoader, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, err
}
return gojsonschema.NewBytesLoader(data), nil
}
// GenerateSchemaFromStructs generates a JSON schema from the current Go structs
// This is a placeholder - in a real implementation, this would use reflection
// to analyze the Go types and generate the schema programmatically
func GenerateSchemaFromStructs() map[string]any {
// For now, return the current schema as a map
// In a production implementation, this would:
// 1. Use reflection to analyze RunManifest, Artifacts, ExecutionEnvironment structs
// 2. Generate JSON schema properties from struct tags
// 3. Extract required fields from validation logic
// 4. Build enum values from constants
// Since we have the schema committed, we just return it parsed
_, testFile, _, _ := runtime.Caller(0)
testDir := filepath.Dir(testFile)
schemaPath := filepath.Join(testDir, "..", "..", "..", "internal", "manifest", "schema.json")
data, err := os.ReadFile(schemaPath)
if err != nil {
// Return empty map if file doesn't exist
return map[string]any{}
}
var schema map[string]any
// Use a decoder that preserves the exact formatting
if err := json.Unmarshal(data, &schema); err != nil {
return map[string]any{}
}
// Re-marshal with consistent indentation to match the file
output, _ := json.MarshalIndent(schema, "", " ")
// Re-parse to get a clean map
var cleanSchema map[string]any
json.Unmarshal(output, &cleanSchema)
return cleanSchema
}
// GenerateSchemaJSON generates the JSON schema as bytes for comparison
func GenerateSchemaJSON() []byte {
_, testFile, _, _ := runtime.Caller(0)
testDir := filepath.Dir(testFile)
schemaPath := filepath.Join(testDir, "..", "..", "..", "internal", "manifest", "schema.json")
data, err := os.ReadFile(schemaPath)
if err != nil {
return nil
}
var schema map[string]any
if err := json.Unmarshal(data, &schema); err != nil {
return nil
}
return jsonMustMarshalIndent(schema, "", " ")
}
// jsonMustMarshalIndent marshals v to JSON with consistent formatting
func jsonMustMarshalIndent(v any, prefix, indent string) []byte {
data, err := json.MarshalIndent(v, prefix, indent)
if err != nil {
return nil
}
return data
}