Add cross-implementation consistency tests for dataset hash functionality: ## Test Fixtures - Single file, nested directories, and multiple file test cases - Expected hashes in JSON format for validation ## Test Infrastructure - harness.go: Common test utilities and reference implementation runner - dataset_hash_test.go: Consistency test cases comparing implementations - cmd/update.go: Tool to regenerate expected hashes from reference ## Purpose Ensures hash implementations (Go, C++, Zig) produce identical results across all supported platforms and implementations.
97 lines
2.7 KiB
Go
97 lines
2.7 KiB
Go
// Command update computes expected hashes for fixtures using the reference Go implementation
|
|
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
|
|
"github.com/jfraeys/fetch_ml/tests/integration/consistency"
|
|
)
|
|
|
|
func main() {
|
|
fixturesDir := filepath.Join("tests", "fixtures", "consistency")
|
|
|
|
// Load current expected hashes
|
|
expectedPath := filepath.Join(fixturesDir, "dataset_hash", "expected_hashes.json")
|
|
data, err := os.ReadFile(expectedPath)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Failed to read expected hashes: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
var expected consistency.ExpectedHashes
|
|
if err := json.Unmarshal(data, &expected); err != nil {
|
|
fmt.Fprintf(os.Stderr, "Failed to parse expected hashes: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Use Go implementation as reference
|
|
goImpl := consistency.NewGoImpl()
|
|
|
|
updated := false
|
|
for i, fixture := range expected.Fixtures {
|
|
fixturePath := filepath.Join(fixturesDir, "dataset_hash", fixture.ID, "input")
|
|
|
|
// Check if fixture exists
|
|
if _, err := os.Stat(fixturePath); os.IsNotExist(err) {
|
|
fmt.Printf("Skipping %s: fixture not found at %s\n", fixture.ID, fixturePath)
|
|
continue
|
|
}
|
|
|
|
// Compute hash using reference implementation
|
|
hash, err := goImpl.HashDataset(fixturePath)
|
|
if err != nil {
|
|
fmt.Printf("Error hashing %s: %v\n", fixture.ID, err)
|
|
continue
|
|
}
|
|
|
|
// Update if different or TODO
|
|
if fixture.ExpectedHash == "TODO_COMPUTE" {
|
|
fmt.Printf("%s: computed %s\n", fixture.ID, hash)
|
|
expected.Fixtures[i].ExpectedHash = hash
|
|
updated = true
|
|
} else if fixture.ExpectedHash != hash {
|
|
fmt.Printf("%s: updated %s -> %s\n", fixture.ID, fixture.ExpectedHash, hash)
|
|
expected.Fixtures[i].ExpectedHash = hash
|
|
updated = true
|
|
} else {
|
|
fmt.Printf("%s: unchanged (%s)\n", fixture.ID, hash)
|
|
}
|
|
|
|
// Compute individual file hashes
|
|
for j, file := range fixture.Files {
|
|
if file.ContentHash == "TODO" || file.ContentHash == "" {
|
|
filePath := filepath.Join(fixturePath, file.Path)
|
|
fileHash, err := goImpl.HashFile(filePath)
|
|
if err != nil {
|
|
fmt.Printf(" %s: error - %v\n", file.Path, err)
|
|
continue
|
|
}
|
|
fmt.Printf(" %s: %s\n", file.Path, fileHash)
|
|
expected.Fixtures[i].Files[j].ContentHash = fileHash
|
|
updated = true
|
|
}
|
|
}
|
|
}
|
|
|
|
if !updated {
|
|
fmt.Println("\nNo updates needed.")
|
|
return
|
|
}
|
|
|
|
// Write updated hashes
|
|
output, err := json.MarshalIndent(expected, "", " ")
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Failed to marshal updated hashes: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
if err := os.WriteFile(expectedPath, output, 0644); err != nil {
|
|
fmt.Fprintf(os.Stderr, "Failed to write updated hashes: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
fmt.Println("\nUpdated expected_hashes.json")
|
|
}
|