fetch_ml/tests/integration/consistency/cmd/update.go
Jeremie Fraeys a239f3a14f
test(consistency): add dataset hash consistency test suite
Add cross-implementation consistency tests for dataset hash functionality:

## Test Fixtures
- Single file, nested directories, and multiple file test cases
- Expected hashes in JSON format for validation

## Test Infrastructure
- harness.go: Common test utilities and reference implementation runner
- dataset_hash_test.go: Consistency test cases comparing implementations
- cmd/update.go: Tool to regenerate expected hashes from reference

## Purpose
Ensures hash implementations (Go, C++, Zig) produce identical results
across all supported platforms and implementations.
2026-03-05 14:41:14 -05:00

97 lines
2.7 KiB
Go

// Command update computes expected hashes for fixtures using the reference Go implementation
package main
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"github.com/jfraeys/fetch_ml/tests/integration/consistency"
)
func main() {
fixturesDir := filepath.Join("tests", "fixtures", "consistency")
// Load current expected hashes
expectedPath := filepath.Join(fixturesDir, "dataset_hash", "expected_hashes.json")
data, err := os.ReadFile(expectedPath)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to read expected hashes: %v\n", err)
os.Exit(1)
}
var expected consistency.ExpectedHashes
if err := json.Unmarshal(data, &expected); err != nil {
fmt.Fprintf(os.Stderr, "Failed to parse expected hashes: %v\n", err)
os.Exit(1)
}
// Use Go implementation as reference
goImpl := consistency.NewGoImpl()
updated := false
for i, fixture := range expected.Fixtures {
fixturePath := filepath.Join(fixturesDir, "dataset_hash", fixture.ID, "input")
// Check if fixture exists
if _, err := os.Stat(fixturePath); os.IsNotExist(err) {
fmt.Printf("Skipping %s: fixture not found at %s\n", fixture.ID, fixturePath)
continue
}
// Compute hash using reference implementation
hash, err := goImpl.HashDataset(fixturePath)
if err != nil {
fmt.Printf("Error hashing %s: %v\n", fixture.ID, err)
continue
}
// Update if different or TODO
if fixture.ExpectedHash == "TODO_COMPUTE" {
fmt.Printf("%s: computed %s\n", fixture.ID, hash)
expected.Fixtures[i].ExpectedHash = hash
updated = true
} else if fixture.ExpectedHash != hash {
fmt.Printf("%s: updated %s -> %s\n", fixture.ID, fixture.ExpectedHash, hash)
expected.Fixtures[i].ExpectedHash = hash
updated = true
} else {
fmt.Printf("%s: unchanged (%s)\n", fixture.ID, hash)
}
// Compute individual file hashes
for j, file := range fixture.Files {
if file.ContentHash == "TODO" || file.ContentHash == "" {
filePath := filepath.Join(fixturePath, file.Path)
fileHash, err := goImpl.HashFile(filePath)
if err != nil {
fmt.Printf(" %s: error - %v\n", file.Path, err)
continue
}
fmt.Printf(" %s: %s\n", file.Path, fileHash)
expected.Fixtures[i].Files[j].ContentHash = fileHash
updated = true
}
}
}
if !updated {
fmt.Println("\nNo updates needed.")
return
}
// Write updated hashes
output, err := json.MarshalIndent(expected, "", " ")
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to marshal updated hashes: %v\n", err)
os.Exit(1)
}
if err := os.WriteFile(expectedPath, output, 0644); err != nil {
fmt.Fprintf(os.Stderr, "Failed to write updated hashes: %v\n", err)
os.Exit(1)
}
fmt.Println("\nUpdated expected_hashes.json")
}