- Add logs and debug end-to-end tests - Add test helper utilities - Improve test fixtures and templates - Update API server and config lint commands - Add multi-user database initialization
95 lines
3.7 KiB
Go
95 lines
3.7 KiB
Go
package api_test
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"github.com/jfraeys/fetch_ml/internal/api/helpers"
|
|
"github.com/jfraeys/fetch_ml/internal/queue"
|
|
)
|
|
|
|
// ProcessTest demonstrates the duplicate detection process step by step
|
|
func TestDuplicateDetectionProcess(t *testing.T) {
|
|
t.Log("=== Duplicate Detection Process Test ===")
|
|
|
|
// Step 1: First job submission
|
|
t.Log("\n1. First job submission:")
|
|
commitID := "abc123def456"
|
|
args1 := "--epochs 10 --lr 0.001"
|
|
datasets := []queue.DatasetSpec{{Name: "mnist", Checksum: "sha256:abc123"}}
|
|
|
|
datasetID1 := helpers.ComputeDatasetID(datasets, nil)
|
|
paramsHash1 := helpers.ComputeParamsHash(args1)
|
|
|
|
t.Logf(" Commit ID: %s", commitID)
|
|
t.Logf(" Dataset ID: %s (computed from %d datasets)", datasetID1, len(datasets))
|
|
t.Logf(" Params Hash: %s (computed from args: %s)", paramsHash1, args1)
|
|
t.Logf(" Composite Key: (%s, %s, %s)", commitID, datasetID1, paramsHash1)
|
|
|
|
// Step 2: Second job with SAME parameters (should be duplicate)
|
|
t.Log("\n2. Second job submission (same params):")
|
|
args2 := "--epochs 10 --lr 0.001" // Same args
|
|
datasets2 := []queue.DatasetSpec{{Name: "mnist", Checksum: "sha256:abc123"}} // Same dataset
|
|
|
|
datasetID2 := helpers.ComputeDatasetID(datasets2, nil)
|
|
paramsHash2 := helpers.ComputeParamsHash(args2)
|
|
|
|
t.Logf(" Commit ID: %s", commitID)
|
|
t.Logf(" Dataset ID: %s", datasetID2)
|
|
t.Logf(" Params Hash: %s", paramsHash2)
|
|
t.Logf(" Composite Key: (%s, %s, %s)", commitID, datasetID2, paramsHash2)
|
|
|
|
// Verify they're the same
|
|
if datasetID1 == datasetID2 && paramsHash1 == paramsHash2 {
|
|
t.Log(" ✓ DUPLICATE DETECTED - same composite key!")
|
|
} else {
|
|
t.Error(" ✗ Should have been detected as duplicate")
|
|
}
|
|
|
|
// Step 3: Third job with DIFFERENT parameters (not duplicate)
|
|
t.Log("\n3. Third job submission (different params):")
|
|
args3 := "--epochs 20 --lr 0.01" // Different args
|
|
datasets3 := []queue.DatasetSpec{{Name: "mnist", Checksum: "sha256:abc123"}} // Same dataset
|
|
|
|
datasetID3 := helpers.ComputeDatasetID(datasets3, nil)
|
|
paramsHash3 := helpers.ComputeParamsHash(args3)
|
|
|
|
t.Logf(" Commit ID: %s", commitID)
|
|
t.Logf(" Dataset ID: %s", datasetID3)
|
|
t.Logf(" Params Hash: %s", paramsHash3)
|
|
t.Logf(" Composite Key: (%s, %s, %s)", commitID, datasetID3, paramsHash3)
|
|
|
|
// Verify they're different
|
|
if paramsHash1 != paramsHash3 {
|
|
t.Log(" ✓ NOT A DUPLICATE - different params_hash")
|
|
} else {
|
|
t.Error(" ✗ Should have different params_hash")
|
|
}
|
|
|
|
// Step 4: Fourth job with DIFFERENT dataset (not duplicate)
|
|
t.Log("\n4. Fourth job submission (different dataset):")
|
|
args4 := "--epochs 10 --lr 0.001" // Same args
|
|
datasets4 := []queue.DatasetSpec{{Name: "cifar10", Checksum: "sha256:def456"}} // Different dataset
|
|
|
|
datasetID4 := helpers.ComputeDatasetID(datasets4, nil)
|
|
paramsHash4 := helpers.ComputeParamsHash(args4)
|
|
|
|
t.Logf(" Commit ID: %s", commitID)
|
|
t.Logf(" Dataset ID: %s", datasetID4)
|
|
t.Logf(" Params Hash: %s", paramsHash4)
|
|
t.Logf(" Composite Key: (%s, %s, %s)", commitID, datasetID4, paramsHash4)
|
|
|
|
// Verify they're different
|
|
if datasetID1 != datasetID4 {
|
|
t.Log(" ✓ NOT A DUPLICATE - different dataset_id")
|
|
} else {
|
|
t.Error(" ✗ Should have different dataset_id")
|
|
}
|
|
|
|
// Step 5: Summary
|
|
t.Log("\n5. Summary:")
|
|
t.Log(" - Jobs 1 & 2: Same commit_id + dataset_id + params_hash = DUPLICATE")
|
|
t.Log(" - Job 3: Different params_hash = NOT DUPLICATE")
|
|
t.Log(" - Job 4: Different dataset_id = NOT DUPLICATE")
|
|
t.Log("\n The composite key (commit_id, dataset_id, params_hash) ensures")
|
|
t.Log(" only truly identical experiments are flagged as duplicates.")
|
|
}
|