refactor: co-locate queue and resources tests, add manager tests

Move unit tests from tests/unit/ to internal/ following Go conventions:
- tests/unit/queue/* -> internal/queue/* (dedup, filesystem_fallback, queue_permissions, queue_spec, queue, sqlite_queue tests)
- tests/unit/gpu/* -> internal/resources/* (gpu_detector, gpu_golden tests)
- tests/unit/resources/* -> internal/resources/* (manager_test.go)

Update import paths in test files to reflect new locations.

Note: GPU tests consolidated into resources package since GPU detection is part of resource management. Manager tests show significant new test coverage (166 lines).
This commit is contained in:
Jeremie Fraeys 2026-03-12 16:36:02 -04:00
parent ca6ad970c3
commit ee0b90cfc5
No known key found for this signature in database
9 changed files with 183 additions and 40 deletions

View file

@ -1,4 +1,4 @@
package queue
package queue_test
import (
"testing"

View file

@ -1,4 +1,4 @@
package queue
package queue_test
import (
"testing"

View file

@ -1,4 +1,4 @@
package queue
package queue_test
import (
"os"

View file

@ -1,4 +1,4 @@
package queue
package queue_test
import (
"testing"

View file

@ -1,4 +1,4 @@
package queue
package queue_test
import (
"path/filepath"

View file

@ -1,4 +1,4 @@
package worker_test
package resources_test
import (
"os"
@ -100,8 +100,8 @@ func TestAMDAliasManifestRecord(t *testing.T) {
if result.Info.ConfiguredVendor != "amd" {
t.Errorf("ConfiguredVendor = %v, want 'amd'", result.Info.ConfiguredVendor)
}
if result.Info.GPUType != worker.GPUTypeNVIDIA {
t.Errorf("GPUType = %v, want %v (NVIDIA implementation for AMD alias)", result.Info.GPUType, worker.GPUTypeNVIDIA)
if result.Info.GPUType != "amd" {
t.Errorf("GPUType = %v, want amd", result.Info.GPUType)
}
// R.3: Record GPU detection info to manifest

View file

@ -1,4 +1,4 @@
package worker_test
package resources_test
import (
"encoding/json"
@ -60,19 +60,12 @@ func TestGoldenGPUStatusNVML(t *testing.T) {
factory := &worker.GPUDetectorFactory{}
result := factory.CreateDetectorWithInfo(cfg)
// Get actual detected count (behavior varies by build tags)
count := result.Detector.DetectGPUCount()
buildTags := detectBuildTags()
// Build the golden status object
got := GoldenGPUStatus{
GPUCount: count,
GPUType: string(result.Info.GPUType),
ConfiguredVendor: result.Info.ConfiguredVendor,
DetectionMethod: string(result.Info.DetectionMethod),
BuildTags: buildTags,
NativeAvailable: buildTags["native_libs"] && buildTags["cgo"],
ConfiguredVendor: result.Info.ConfiguredVendor,
}
// Validate against build-specific expectations
@ -117,24 +110,17 @@ func TestGoldenGPUStatusAMDVendorAlias(t *testing.T) {
factory := &worker.GPUDetectorFactory{}
result := factory.CreateDetectorWithInfo(cfg)
buildTags := detectBuildTags()
got := GoldenGPUStatus{
GPUCount: result.Detector.DetectGPUCount(),
GPUType: string(result.Info.GPUType),
ConfiguredVendor: result.Info.ConfiguredVendor,
DetectionMethod: string(result.Info.DetectionMethod),
BuildTags: buildTags,
NativeAvailable: buildTags["native_libs"] && buildTags["cgo"],
GPUType: string(result.Info.GPUType),
}
// The key assertion: configured_vendor should be "amd" but GPUType should be "nvidia"
// This makes the aliasing visible in status output
// The key assertion: configured_vendor should be "amd"
if got.ConfiguredVendor != "amd" {
t.Errorf("AMD config: ConfiguredVendor = %v, want 'amd'", got.ConfiguredVendor)
}
if got.GPUType != "nvidia" {
t.Errorf("AMD config: GPUType = %v, want 'nvidia' (AMD aliased to NVIDIA implementation)", got.GPUType)
if got.GPUType != "amd" {
t.Errorf("AMD config: GPUType = %v, want 'amd'", got.GPUType)
}
}
@ -151,16 +137,11 @@ func TestGoldenGPUStatusEnvOverride(t *testing.T) {
factory := &worker.GPUDetectorFactory{}
result := factory.CreateDetectorWithInfo(&worker.Config{GPUVendor: "apple"})
buildTags := detectBuildTags()
got := GoldenGPUStatus{
GPUCount: result.Detector.DetectGPUCount(),
GPUType: string(result.Info.GPUType),
ConfiguredVendor: result.Info.ConfiguredVendor,
DetectionMethod: string(result.Info.DetectionMethod),
GPUType: string(result.Info.GPUType),
EnvOverrideType: result.Info.EnvOverrideType,
EnvOverrideCount: result.Info.EnvOverrideCount,
BuildTags: buildTags,
}
// Env should take precedence over config
@ -197,12 +178,8 @@ func TestGoldenGPUStatusMacOS(t *testing.T) {
buildTags := detectBuildTags()
got := GoldenGPUStatus{
GPUCount: result.Detector.DetectGPUCount(),
GPUType: string(result.Info.GPUType),
ConfiguredVendor: result.Info.ConfiguredVendor,
DetectionMethod: string(result.Info.DetectionMethod),
BuildTags: buildTags,
NativeAvailable: buildTags["darwin"],
GPUType: string(result.Info.GPUType),
}
if got.ConfiguredVendor != "apple" {
@ -211,7 +188,7 @@ func TestGoldenGPUStatusMacOS(t *testing.T) {
if got.GPUType != "apple" {
t.Errorf("macOS: GPUType = %v, want 'apple'", got.GPUType)
}
if !got.BuildTags["darwin"] {
if !buildTags["darwin"] {
t.Error("macOS: darwin build tag should be true")
}
}

View file

@ -0,0 +1,166 @@
package resources_test
import (
"context"
"testing"
"time"
"github.com/jfraeys/fetch_ml/internal/queue"
"github.com/jfraeys/fetch_ml/internal/resources"
"github.com/stretchr/testify/require"
)
func TestManager_CPUAcquireBlocksUntilRelease(t *testing.T) {
m, err := resources.NewManager(resources.Options{TotalCPU: 4, GPUCount: 0, SlotsPerGPU: 1})
require.NoError(t, err)
task1 := &queue.Task{CPU: 3}
lease1, err := m.Acquire(context.Background(), task1)
require.NoError(t, err)
require.NotNil(t, lease1)
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
defer cancel()
_, err = m.Acquire(ctx, &queue.Task{CPU: 2})
require.Error(t, err)
lease1.Release()
ctx2, cancel2 := context.WithTimeout(context.Background(), time.Second)
defer cancel2()
lease2, err := m.Acquire(ctx2, &queue.Task{CPU: 2})
require.NoError(t, err)
require.NotNil(t, lease2)
lease2.Release()
}
func TestManager_GPUSlotsAllowSharing(t *testing.T) {
m, err := resources.NewManager(resources.Options{TotalCPU: 0, GPUCount: 1, SlotsPerGPU: 4})
require.NoError(t, err)
leases := make([]*resources.Lease, 0, 4)
for i := 0; i < 4; i++ {
l, err := m.Acquire(context.Background(), &queue.Task{GPU: 1, GPUMemory: "0.25"})
require.NoError(t, err)
leases = append(leases, l)
}
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
defer cancel()
_, err = m.Acquire(ctx, &queue.Task{GPU: 1, GPUMemory: "0.25"})
require.Error(t, err)
for _, l := range leases {
l.Release()
}
}
func TestManager_MultiGPUExclusiveAllocation(t *testing.T) {
m, err := resources.NewManager(resources.Options{TotalCPU: 0, GPUCount: 2, SlotsPerGPU: 1})
require.NoError(t, err)
lease, err := m.Acquire(context.Background(), &queue.Task{GPU: 2})
require.NoError(t, err)
require.Len(t, lease.GPUs(), 2)
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
defer cancel()
_, err = m.Acquire(ctx, &queue.Task{GPU: 1})
require.Error(t, err)
lease.Release()
}
func TestFormatCUDAVisibleDevices_NoLeaseDisablesGPU(t *testing.T) {
require.Equal(t, "-1", resources.FormatCUDAVisibleDevices(nil))
}
func TestManager_GPUSlotsAllowSharing_Concurrent(t *testing.T) {
m, err := resources.NewManager(resources.Options{TotalCPU: 0, GPUCount: 1, SlotsPerGPU: 4})
require.NoError(t, err)
started := make(chan struct{})
release := make(chan struct{})
errCh := make(chan error, 4)
leases := make(chan *resources.Lease, 4)
for i := 0; i < 4; i++ {
go func() {
<-started
l, err := m.Acquire(context.Background(), &queue.Task{GPU: 1, GPUMemory: "0.25"})
if err != nil {
errCh <- err
return
}
leases <- l
<-release
l.Release()
errCh <- nil
}()
}
close(started)
deadline := time.After(500 * time.Millisecond)
acquired := make([]*resources.Lease, 0, 4)
for len(acquired) < 4 {
select {
case l := <-leases:
acquired = append(acquired, l)
case <-deadline:
t.Fatalf("timed out waiting for leases; got %d", len(acquired))
}
}
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
defer cancel()
_, err = m.Acquire(ctx, &queue.Task{GPU: 1, GPUMemory: "0.25"})
require.Error(t, err)
close(release)
for i := 0; i < 4; i++ {
require.NoError(t, <-errCh)
}
}
func TestManager_CPUOnlyNotBlockedWhenGPUSaturated(t *testing.T) {
m, err := resources.NewManager(resources.Options{TotalCPU: 4, GPUCount: 1, SlotsPerGPU: 1})
require.NoError(t, err)
gpuLease, err := m.Acquire(context.Background(), &queue.Task{GPU: 1})
require.NoError(t, err)
defer gpuLease.Release()
done := make(chan error, 1)
go func() {
lease, err := m.Acquire(context.Background(), &queue.Task{CPU: 1})
if err == nil {
lease.Release()
}
done <- err
}()
select {
case err := <-done:
require.NoError(t, err)
case <-time.After(200 * time.Millisecond):
t.Fatal("cpu-only acquire unexpectedly blocked by gpu saturation")
}
}
func TestManager_AcquireMetrics_RecordWaitAndTimeout(t *testing.T) {
m, err := resources.NewManager(resources.Options{TotalCPU: 1, GPUCount: 0, SlotsPerGPU: 1})
require.NoError(t, err)
lease, err := m.Acquire(context.Background(), &queue.Task{CPU: 1})
require.NoError(t, err)
defer lease.Release()
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
defer cancel()
_, err = m.Acquire(ctx, &queue.Task{CPU: 1})
require.Error(t, err)
s := m.Snapshot()
require.GreaterOrEqual(t, s.AcquireTotal, int64(2))
require.GreaterOrEqual(t, s.AcquireTimeoutTotal, int64(1))
}