fetch_ml/tests/integration/duplicate_detection_test.go
2026-02-23 14:14:21 -05:00

63 lines
2.1 KiB
Go

package tests
import (
"testing"
"github.com/jfraeys/fetch_ml/internal/api/helpers"
"github.com/jfraeys/fetch_ml/internal/queue"
)
// TestDuplicateDetection verifies the duplicate detection logic
func TestDuplicateDetection(t *testing.T) {
// Test 1: Same args + same commit = duplicate (would be detected)
commitID := "abc123def4567890abc1"
args1 := "--epochs 10 --lr 0.001"
args2 := "--epochs 10 --lr 0.001" // Same args
hash1 := helpers.ComputeParamsHash(args1)
hash2 := helpers.ComputeParamsHash(args2)
if hash1 != hash2 {
t.Error("Same args should produce same hash")
}
t.Logf("Same args produce same hash: %s", hash1)
// Test 2: Different args = not duplicate
args3 := "--epochs 20 --lr 0.01" // Different
hash3 := helpers.ComputeParamsHash(args3)
if hash1 == hash3 {
t.Error("Different args should produce different hashes")
}
t.Logf("Different args produce different hashes: %s vs %s", hash1, hash3)
// Test 3: Same dataset specs = same dataset_id
ds1 := []queue.DatasetSpec{{Name: "mnist", Checksum: "sha256:abc123"}}
ds2 := []queue.DatasetSpec{{Name: "mnist", Checksum: "sha256:abc123"}}
id1 := helpers.ComputeDatasetID(ds1, nil)
id2 := helpers.ComputeDatasetID(ds2, nil)
if id1 != id2 {
t.Error("Same dataset specs should produce same ID")
}
t.Logf("Same dataset specs produce same ID: %s", id1)
// Test 4: Different dataset = different ID
ds3 := []queue.DatasetSpec{{Name: "cifar10", Checksum: "sha256:def456"}}
id3 := helpers.ComputeDatasetID(ds3, nil)
if id1 == id3 {
t.Error("Different datasets should produce different IDs")
}
t.Logf("Different datasets produce different IDs: %s vs %s", id1, id3)
// Test 5: Composite key logic
t.Log("\n=== Composite Key Detection ===")
t.Logf("Job 1: commit=%s, dataset_id=%s, params_hash=%s", commitID, id1, hash1)
t.Logf("Job 2: commit=%s, dataset_id=%s, params_hash=%s", commitID, id2, hash2)
t.Log("→ These would be detected as DUPLICATE (same commit + dataset + params)")
t.Logf("Job 3: commit=%s, dataset_id=%s, params_hash=%s", commitID, id1, hash3)
t.Log("→ This would NOT be duplicate (different params)")
}