fetch_ml/internal/storage/db_jobs_test.go
Jeremie Fraeys 50b6506243
test(storage): add comprehensive storage layer tests
Add tests for:
- dataset: Redis dataset operations, transfer tracking
- db_audit: audit logging with hash chain, access tracking
- db_experiments: experiment metadata, dataset associations
- db_tasks: task listing with pagination for users and groups
- db_jobs: job CRUD, state transitions, worker assignment

Coverage: storage package ~40%+
2026-03-13 23:26:33 -04:00

361 lines
8 KiB
Go

package storage_test
import (
"testing"
"github.com/jfraeys/fetch_ml/internal/storage"
fixtures "github.com/jfraeys/fetch_ml/tests/fixtures"
_ "github.com/mattn/go-sqlite3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// TestMain sets up shared test infrastructure
func TestMain(m *testing.M) {
// Storage tests use per-test setup for isolation
// due to complex schema requirements
m.Run()
}
// setupTestDB creates a fresh database for each test
func setupTestDB(t *testing.T) *storage.DB {
t.Helper()
dbPath := t.TempDir() + "/test.db"
db, err := storage.NewDBFromPath(dbPath)
require.NoError(t, err, "Failed to create database")
err = db.Initialize(fixtures.TestSchema)
require.NoError(t, err, "Failed to initialize database schema")
t.Cleanup(func() {
_ = db.Close()
})
return db
}
// TestNewDBFromPath tests database creation
func TestNewDBFromPath(t *testing.T) {
t.Parallel()
cases := []struct {
name string
path string
wantErr bool
}{
{"valid path", "", false}, // uses temp dir
{"with wal mode", "", false}, // SQLite with WAL
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
var dbPath string
if tc.path == "" {
dbPath = t.TempDir() + "/test.db"
} else {
dbPath = tc.path
}
db, err := storage.NewDBFromPath(dbPath)
if tc.wantErr {
require.Error(t, err)
return
}
require.NoError(t, err)
require.NotNil(t, db)
defer db.Close()
})
}
}
// TestNewDBFromPathInvalidPath tests error handling for invalid paths
func TestNewDBFromPathInvalidPath(t *testing.T) {
t.Parallel()
_, err := storage.NewDBFromPath("/invalid/path/that/does/not/exist/db.sqlite")
require.Error(t, err, "Expected error for invalid path")
}
// TestDBInitialize tests schema initialization
func TestDBInitialize(t *testing.T) {
t.Parallel()
dbPath := t.TempDir() + "/test.db"
db, err := storage.NewDBFromPath(dbPath)
require.NoError(t, err)
defer db.Close()
err = db.Initialize(fixtures.TestSchema)
require.NoError(t, err, "Failed to initialize schema")
// Verify tables exist by attempting operations
job := &storage.Job{
ID: "init-test",
JobName: "test",
Status: "pending",
}
err = db.CreateJob(job)
require.NoError(t, err, "Should be able to create job after init")
}
// TestDBClose tests database close operation
func TestDBClose(t *testing.T) {
t.Parallel()
dbPath := t.TempDir() + "/test.db"
db, err := storage.NewDBFromPath(dbPath)
require.NoError(t, err)
err = db.Close()
require.NoError(t, err, "Close should not error")
// Double close should error
err = db.Close()
require.Error(t, err, "Double close should error")
}
// TestCreateJob tests job creation with various scenarios
func TestCreateJob(t *testing.T) {
t.Parallel()
cases := []struct {
name string
job *storage.Job
wantErr bool
}{
{
name: "valid job",
job: &storage.Job{
ID: "job-1",
JobName: "test_experiment",
Args: "--epochs 10",
Status: "pending",
Priority: 1,
Datasets: []string{"ds1", "ds2"},
Metadata: map[string]string{"user": "test"},
},
wantErr: false,
},
{
name: "minimal job",
job: &storage.Job{
ID: "job-2",
Status: "pending",
},
wantErr: false,
},
{
name: "duplicate id",
job: &storage.Job{
ID: "job-1", // Same as first case
Status: "pending",
},
wantErr: true,
},
}
db := setupTestDB(t)
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
err := db.CreateJob(tc.job)
if tc.wantErr {
require.Error(t, err)
return
}
require.NoError(t, err)
})
}
}
// TestGetJob tests job retrieval
func TestGetJob(t *testing.T) {
t.Parallel()
db := setupTestDB(t)
// Create test job
job := &storage.Job{
ID: "get-test",
JobName: "retrieve_test",
Args: "--batch 32",
Status: "running",
Priority: 5,
Datasets: []string{"train", "val"},
Metadata: map[string]string{"gpu": "true"},
}
err := db.CreateJob(job)
require.NoError(t, err)
// Test retrieval
cases := []struct {
name string
id string
wantErr bool
wantID string
}{
{"existing job", "get-test", false, "get-test"},
{"nonexistent job", "not-found", true, ""},
{"empty id", "", true, ""},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got, err := db.GetJob(tc.id)
if tc.wantErr {
require.Error(t, err)
return
}
require.NoError(t, err)
assert.Equal(t, tc.wantID, got.ID)
assert.Equal(t, job.JobName, got.JobName)
assert.Equal(t, job.Status, got.Status)
assert.Equal(t, job.Priority, got.Priority)
})
}
}
// TestUpdateJobStatusExtended tests comprehensive job status update scenarios
func TestUpdateJobStatusExtended(t *testing.T) {
t.Parallel()
db := setupTestDB(t)
// Create job
job := &storage.Job{
ID: "update-extended-test",
Status: "pending",
}
require.NoError(t, db.CreateJob(job))
cases := []struct {
name string
status string
workerID string
errorMsg string
wantErr bool
}{
{"pending to running", "running", "worker-1", "", false},
{"running to completed", "completed", "worker-1", "", false},
{"completed to failed", "failed", "worker-1", "oom", false},
{"nonexistent job", "running", "", "", true},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
id := job.ID
if tc.wantErr {
id = "nonexistent"
}
err := db.UpdateJobStatus(id, tc.status, tc.workerID, tc.errorMsg)
if tc.wantErr {
require.Error(t, err)
return
}
require.NoError(t, err)
// Verify update
updated, err := db.GetJob(id)
require.NoError(t, err)
assert.Equal(t, tc.status, updated.Status)
assert.Equal(t, tc.workerID, updated.WorkerID)
})
}
}
// TestDeleteJob tests job deletion
func TestDeleteJob(t *testing.T) {
t.Parallel()
db := setupTestDB(t)
// Create and delete
job := &storage.Job{ID: "delete-me", Status: "pending"}
require.NoError(t, db.CreateJob(job))
err := db.DeleteJob("delete-me")
require.NoError(t, err)
// Verify deletion
_, err = db.GetJob("delete-me")
require.Error(t, err, "Deleted job should not be found")
// Delete nonexistent should not error
err = db.DeleteJob("nonexistent")
require.NoError(t, err)
}
// TestListJobsExtended tests comprehensive job listing scenarios
func TestListJobsExtended(t *testing.T) {
t.Parallel()
db := setupTestDB(t)
// Create jobs with different statuses and priorities
jobs := []*storage.Job{
{ID: "list-ext-1", Status: "pending", Priority: 1, JobName: "job-a"},
{ID: "list-ext-2", Status: "running", Priority: 2, JobName: "job-b"},
{ID: "list-ext-3", Status: "completed", Priority: 3, JobName: "job-c"},
{ID: "list-ext-4", Status: "failed", Priority: 4, JobName: "job-d"},
}
for _, j := range jobs {
require.NoError(t, db.CreateJob(j))
}
cases := []struct {
name string
status string
limit int
wantLen int
wantErr bool
}{
{"all jobs", "", 10, 4, false},
{"pending only", "pending", 10, 1, false},
{"running only", "running", 10, 1, false},
{"completed only", "completed", 10, 1, false},
{"failed only", "failed", 10, 1, false},
{"with limit 2", "", 2, 2, false},
{"with limit 1", "", 1, 1, false},
{"nonexistent status", "cancelled", 10, 0, false},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got, err := db.ListJobs(tc.status, tc.limit)
if tc.wantErr {
require.Error(t, err)
return
}
require.NoError(t, err)
assert.Len(t, got, tc.wantLen)
})
}
}
// TestDeleteJobsByPrefix tests batch deletion by prefix
func TestDeleteJobsByPrefix(t *testing.T) {
t.Parallel()
db := setupTestDB(t)
// Create jobs with prefixes
jobs := []*storage.Job{
{ID: "prefix-a-1", Status: "pending"},
{ID: "prefix-a-2", Status: "pending"},
{ID: "prefix-b-1", Status: "pending"},
}
for _, j := range jobs {
require.NoError(t, db.CreateJob(j))
}
// Delete by prefix
err := db.DeleteJobsByPrefix("prefix-a-%")
require.NoError(t, err)
// Verify
remaining, err := db.ListJobs("", 10)
require.NoError(t, err)
assert.Len(t, remaining, 1)
assert.Equal(t, "prefix-b-1", remaining[0].ID)
}