fetch_ml/tests/integration/consistency/dataset_hash_test.go

216 lines
5.3 KiB
Go

//go:build cgo
// +build cgo
package consistency
import (
"os"
"path/filepath"
"testing"
)
// TestDatasetHashConsistency verifies all implementations produce identical hashes
func TestDatasetHashConsistency(t *testing.T) {
fixturesDir := filepath.Join("..", "..", "fixtures", "consistency")
expected, err := LoadExpectedHashes(fixturesDir)
if err != nil {
t.Fatalf("Failed to load expected hashes: %v", err)
}
// Initialize implementations
impls := []Implementation{
NewNativeImpl(),
NewGoImpl(),
NewZigImpl(),
}
// Check which implementations are available
availableCount := 0
for _, impl := range impls {
if impl.Available() {
availableCount++
t.Logf("Implementation available: %s", impl.Name())
} else {
t.Logf("Implementation not available: %s", impl.Name())
}
}
if availableCount < 2 {
t.Skip("Need at least 2 implementations for consistency testing")
}
// Test each fixture
for _, fixture := range expected.Fixtures {
t.Run(fixture.ID, func(t *testing.T) {
testFixture(t, fixturesDir, &fixture, impls)
})
}
}
// TestDatasetHashSmoke runs a quick smoke test
func TestDatasetHashSmoke(t *testing.T) {
fixturesDir := filepath.Join("..", "..", "fixtures", "consistency")
// Just test single file fixture for quick validation
fixturePath := filepath.Join(fixturesDir, "dataset_hash", "02_single_file", "input")
// Verify fixture exists
if _, err := os.Stat(fixturePath); os.IsNotExist(err) {
t.Skipf("Fixture not found: %s", fixturePath)
}
impls := []Implementation{
NewNativeImpl(),
NewGoImpl(),
NewZigImpl(),
}
results, err := ComputeAllHashes(fixturePath, impls)
if err != nil {
t.Logf("Errors during hash computation: %v", err)
}
expected := "6dd7e8e932ea9d58555d7fee44a9b01a9bd7448e986636b728ee3711b01f37ce"
match, mismatches := CompareHashes(results, expected)
t.Logf("\n%s", FormatHashComparison(results, expected))
if !match {
for _, m := range mismatches {
t.Errorf("Mismatch: %s", m)
}
}
}
// TestCrossImplEquivalence compares implementations against each other
func TestCrossImplEquivalence(t *testing.T) {
fixturesDir := filepath.Join("..", "..", "fixtures", "consistency")
impls := []Implementation{
NewGoImpl(),
NewNativeImpl(),
NewZigImpl(),
}
// Find first available implementation as reference
var reference Implementation
for _, impl := range impls {
if impl.Available() {
reference = impl
break
}
}
if reference == nil {
t.Skip("No implementations available")
}
t.Logf("Using %s as reference implementation", reference.Name())
// Test fixtures
fixtures := []string{
"02_single_file",
"03_nested",
"04_multiple_files",
}
for _, fixtureName := range fixtures {
t.Run(fixtureName, func(t *testing.T) {
fixturePath := filepath.Join(fixturesDir, "dataset_hash", fixtureName, "input")
if _, err := os.Stat(fixturePath); os.IsNotExist(err) {
t.Skipf("Fixture not found: %s", fixturePath)
}
// Get reference hash
refHash, err := reference.HashDataset(fixturePath)
if err != nil {
t.Fatalf("Reference implementation failed: %v", err)
}
// Compare all other implementations
for _, impl := range impls {
if impl == reference || !impl.Available() {
continue
}
hash, err := impl.HashDataset(fixturePath)
if err != nil {
t.Errorf("%s failed: %v", impl.Name(), err)
continue
}
if hash != refHash {
t.Errorf("%s mismatch: got %s, reference (%s) has %s",
impl.Name(), hash, reference.Name(), refHash)
} else {
t.Logf("%s matches reference ✓", impl.Name())
}
}
})
}
}
// TestEmptyDirectory specifically tests empty directory handling
func TestEmptyDirectory(t *testing.T) {
fixturesDir := filepath.Join("..", "..", "fixtures", "consistency")
fixturePath := filepath.Join(fixturesDir, "dataset_hash", "01_empty_dir", "input")
expected := "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
impls := []Implementation{
NewGoImpl(),
NewNativeImpl(),
NewZigImpl(),
}
for _, impl := range impls {
if !impl.Available() {
continue
}
t.Run(impl.Name(), func(t *testing.T) {
hash, err := impl.HashDataset(fixturePath)
if err != nil {
t.Fatalf("Failed to hash empty directory: %v", err)
}
if hash != expected {
t.Errorf("Empty directory hash mismatch: got %s, expected %s", hash, expected)
}
})
}
}
// testFixture tests a single fixture against all implementations
func testFixture(t *testing.T, fixturesDir string, fixture *Fixture, impls []Implementation) {
fixturePath := filepath.Join(fixturesDir, "dataset_hash", fixture.ID, "input")
// Verify fixture exists
if _, err := os.Stat(fixturePath); os.IsNotExist(err) {
t.Skipf("Fixture not found: %s", fixturePath)
}
// Skip fixtures with TODO expected hashes
if fixture.ExpectedHash == "TODO_COMPUTE" {
t.Skipf("Fixture %s has uncomputed expected hash", fixture.ID)
}
results, err := ComputeAllHashes(fixturePath, impls)
if err != nil {
t.Logf("Errors during hash computation: %v", err)
}
match, mismatches := CompareHashes(results, fixture.ExpectedHash)
// Log comparison for debugging
t.Logf("\nFixture: %s - %s", fixture.ID, fixture.Name)
t.Logf("\n%s", FormatHashComparison(results, fixture.ExpectedHash))
if !match {
for _, m := range mismatches {
t.Errorf("%s", m)
}
}
}