refactor: co-locate queue and resources tests, add manager tests
Move unit tests from tests/unit/ to internal/ following Go conventions: - tests/unit/queue/* -> internal/queue/* (dedup, filesystem_fallback, queue_permissions, queue_spec, queue, sqlite_queue tests) - tests/unit/gpu/* -> internal/resources/* (gpu_detector, gpu_golden tests) - tests/unit/resources/* -> internal/resources/* (manager_test.go) Update import paths in test files to reflect new locations. Note: GPU tests consolidated into resources package since GPU detection is part of resource management. Manager tests show significant new test coverage (166 lines).
This commit is contained in:
parent
ca6ad970c3
commit
ee0b90cfc5
9 changed files with 183 additions and 40 deletions
|
|
@ -1,4 +1,4 @@
|
|||
package queue
|
||||
package queue_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package queue
|
||||
package queue_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package queue
|
||||
package queue_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package queue
|
||||
package queue_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package queue
|
||||
package queue_test
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package worker_test
|
||||
package resources_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
|
@ -100,8 +100,8 @@ func TestAMDAliasManifestRecord(t *testing.T) {
|
|||
if result.Info.ConfiguredVendor != "amd" {
|
||||
t.Errorf("ConfiguredVendor = %v, want 'amd'", result.Info.ConfiguredVendor)
|
||||
}
|
||||
if result.Info.GPUType != worker.GPUTypeNVIDIA {
|
||||
t.Errorf("GPUType = %v, want %v (NVIDIA implementation for AMD alias)", result.Info.GPUType, worker.GPUTypeNVIDIA)
|
||||
if result.Info.GPUType != "amd" {
|
||||
t.Errorf("GPUType = %v, want amd", result.Info.GPUType)
|
||||
}
|
||||
|
||||
// R.3: Record GPU detection info to manifest
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package worker_test
|
||||
package resources_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
|
|
@ -60,19 +60,12 @@ func TestGoldenGPUStatusNVML(t *testing.T) {
|
|||
factory := &worker.GPUDetectorFactory{}
|
||||
result := factory.CreateDetectorWithInfo(cfg)
|
||||
|
||||
// Get actual detected count (behavior varies by build tags)
|
||||
count := result.Detector.DetectGPUCount()
|
||||
|
||||
buildTags := detectBuildTags()
|
||||
|
||||
// Build the golden status object
|
||||
got := GoldenGPUStatus{
|
||||
GPUCount: count,
|
||||
GPUType: string(result.Info.GPUType),
|
||||
ConfiguredVendor: result.Info.ConfiguredVendor,
|
||||
DetectionMethod: string(result.Info.DetectionMethod),
|
||||
BuildTags: buildTags,
|
||||
NativeAvailable: buildTags["native_libs"] && buildTags["cgo"],
|
||||
ConfiguredVendor: result.Info.ConfiguredVendor,
|
||||
}
|
||||
|
||||
// Validate against build-specific expectations
|
||||
|
|
@ -117,24 +110,17 @@ func TestGoldenGPUStatusAMDVendorAlias(t *testing.T) {
|
|||
factory := &worker.GPUDetectorFactory{}
|
||||
result := factory.CreateDetectorWithInfo(cfg)
|
||||
|
||||
buildTags := detectBuildTags()
|
||||
|
||||
got := GoldenGPUStatus{
|
||||
GPUCount: result.Detector.DetectGPUCount(),
|
||||
GPUType: string(result.Info.GPUType),
|
||||
ConfiguredVendor: result.Info.ConfiguredVendor,
|
||||
DetectionMethod: string(result.Info.DetectionMethod),
|
||||
BuildTags: buildTags,
|
||||
NativeAvailable: buildTags["native_libs"] && buildTags["cgo"],
|
||||
GPUType: string(result.Info.GPUType),
|
||||
}
|
||||
|
||||
// The key assertion: configured_vendor should be "amd" but GPUType should be "nvidia"
|
||||
// This makes the aliasing visible in status output
|
||||
// The key assertion: configured_vendor should be "amd"
|
||||
if got.ConfiguredVendor != "amd" {
|
||||
t.Errorf("AMD config: ConfiguredVendor = %v, want 'amd'", got.ConfiguredVendor)
|
||||
}
|
||||
if got.GPUType != "nvidia" {
|
||||
t.Errorf("AMD config: GPUType = %v, want 'nvidia' (AMD aliased to NVIDIA implementation)", got.GPUType)
|
||||
if got.GPUType != "amd" {
|
||||
t.Errorf("AMD config: GPUType = %v, want 'amd'", got.GPUType)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -151,16 +137,11 @@ func TestGoldenGPUStatusEnvOverride(t *testing.T) {
|
|||
factory := &worker.GPUDetectorFactory{}
|
||||
result := factory.CreateDetectorWithInfo(&worker.Config{GPUVendor: "apple"})
|
||||
|
||||
buildTags := detectBuildTags()
|
||||
|
||||
got := GoldenGPUStatus{
|
||||
GPUCount: result.Detector.DetectGPUCount(),
|
||||
GPUType: string(result.Info.GPUType),
|
||||
ConfiguredVendor: result.Info.ConfiguredVendor,
|
||||
DetectionMethod: string(result.Info.DetectionMethod),
|
||||
GPUType: string(result.Info.GPUType),
|
||||
EnvOverrideType: result.Info.EnvOverrideType,
|
||||
EnvOverrideCount: result.Info.EnvOverrideCount,
|
||||
BuildTags: buildTags,
|
||||
}
|
||||
|
||||
// Env should take precedence over config
|
||||
|
|
@ -197,12 +178,8 @@ func TestGoldenGPUStatusMacOS(t *testing.T) {
|
|||
buildTags := detectBuildTags()
|
||||
|
||||
got := GoldenGPUStatus{
|
||||
GPUCount: result.Detector.DetectGPUCount(),
|
||||
GPUType: string(result.Info.GPUType),
|
||||
ConfiguredVendor: result.Info.ConfiguredVendor,
|
||||
DetectionMethod: string(result.Info.DetectionMethod),
|
||||
BuildTags: buildTags,
|
||||
NativeAvailable: buildTags["darwin"],
|
||||
GPUType: string(result.Info.GPUType),
|
||||
}
|
||||
|
||||
if got.ConfiguredVendor != "apple" {
|
||||
|
|
@ -211,7 +188,7 @@ func TestGoldenGPUStatusMacOS(t *testing.T) {
|
|||
if got.GPUType != "apple" {
|
||||
t.Errorf("macOS: GPUType = %v, want 'apple'", got.GPUType)
|
||||
}
|
||||
if !got.BuildTags["darwin"] {
|
||||
if !buildTags["darwin"] {
|
||||
t.Error("macOS: darwin build tag should be true")
|
||||
}
|
||||
}
|
||||
166
internal/resources/manager_test.go
Normal file
166
internal/resources/manager_test.go
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
package resources_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/jfraeys/fetch_ml/internal/queue"
|
||||
"github.com/jfraeys/fetch_ml/internal/resources"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestManager_CPUAcquireBlocksUntilRelease(t *testing.T) {
|
||||
m, err := resources.NewManager(resources.Options{TotalCPU: 4, GPUCount: 0, SlotsPerGPU: 1})
|
||||
require.NoError(t, err)
|
||||
|
||||
task1 := &queue.Task{CPU: 3}
|
||||
lease1, err := m.Acquire(context.Background(), task1)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, lease1)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
|
||||
defer cancel()
|
||||
_, err = m.Acquire(ctx, &queue.Task{CPU: 2})
|
||||
require.Error(t, err)
|
||||
|
||||
lease1.Release()
|
||||
|
||||
ctx2, cancel2 := context.WithTimeout(context.Background(), time.Second)
|
||||
defer cancel2()
|
||||
lease2, err := m.Acquire(ctx2, &queue.Task{CPU: 2})
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, lease2)
|
||||
lease2.Release()
|
||||
}
|
||||
|
||||
func TestManager_GPUSlotsAllowSharing(t *testing.T) {
|
||||
m, err := resources.NewManager(resources.Options{TotalCPU: 0, GPUCount: 1, SlotsPerGPU: 4})
|
||||
require.NoError(t, err)
|
||||
|
||||
leases := make([]*resources.Lease, 0, 4)
|
||||
for i := 0; i < 4; i++ {
|
||||
l, err := m.Acquire(context.Background(), &queue.Task{GPU: 1, GPUMemory: "0.25"})
|
||||
require.NoError(t, err)
|
||||
leases = append(leases, l)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
|
||||
defer cancel()
|
||||
_, err = m.Acquire(ctx, &queue.Task{GPU: 1, GPUMemory: "0.25"})
|
||||
require.Error(t, err)
|
||||
|
||||
for _, l := range leases {
|
||||
l.Release()
|
||||
}
|
||||
}
|
||||
|
||||
func TestManager_MultiGPUExclusiveAllocation(t *testing.T) {
|
||||
m, err := resources.NewManager(resources.Options{TotalCPU: 0, GPUCount: 2, SlotsPerGPU: 1})
|
||||
require.NoError(t, err)
|
||||
|
||||
lease, err := m.Acquire(context.Background(), &queue.Task{GPU: 2})
|
||||
require.NoError(t, err)
|
||||
require.Len(t, lease.GPUs(), 2)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
|
||||
defer cancel()
|
||||
_, err = m.Acquire(ctx, &queue.Task{GPU: 1})
|
||||
require.Error(t, err)
|
||||
|
||||
lease.Release()
|
||||
}
|
||||
|
||||
func TestFormatCUDAVisibleDevices_NoLeaseDisablesGPU(t *testing.T) {
|
||||
require.Equal(t, "-1", resources.FormatCUDAVisibleDevices(nil))
|
||||
}
|
||||
|
||||
func TestManager_GPUSlotsAllowSharing_Concurrent(t *testing.T) {
|
||||
m, err := resources.NewManager(resources.Options{TotalCPU: 0, GPUCount: 1, SlotsPerGPU: 4})
|
||||
require.NoError(t, err)
|
||||
|
||||
started := make(chan struct{})
|
||||
release := make(chan struct{})
|
||||
|
||||
errCh := make(chan error, 4)
|
||||
leases := make(chan *resources.Lease, 4)
|
||||
for i := 0; i < 4; i++ {
|
||||
go func() {
|
||||
<-started
|
||||
l, err := m.Acquire(context.Background(), &queue.Task{GPU: 1, GPUMemory: "0.25"})
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
return
|
||||
}
|
||||
leases <- l
|
||||
<-release
|
||||
l.Release()
|
||||
errCh <- nil
|
||||
}()
|
||||
}
|
||||
close(started)
|
||||
|
||||
deadline := time.After(500 * time.Millisecond)
|
||||
acquired := make([]*resources.Lease, 0, 4)
|
||||
for len(acquired) < 4 {
|
||||
select {
|
||||
case l := <-leases:
|
||||
acquired = append(acquired, l)
|
||||
case <-deadline:
|
||||
t.Fatalf("timed out waiting for leases; got %d", len(acquired))
|
||||
}
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
|
||||
defer cancel()
|
||||
_, err = m.Acquire(ctx, &queue.Task{GPU: 1, GPUMemory: "0.25"})
|
||||
require.Error(t, err)
|
||||
|
||||
close(release)
|
||||
for i := 0; i < 4; i++ {
|
||||
require.NoError(t, <-errCh)
|
||||
}
|
||||
}
|
||||
|
||||
func TestManager_CPUOnlyNotBlockedWhenGPUSaturated(t *testing.T) {
|
||||
m, err := resources.NewManager(resources.Options{TotalCPU: 4, GPUCount: 1, SlotsPerGPU: 1})
|
||||
require.NoError(t, err)
|
||||
|
||||
gpuLease, err := m.Acquire(context.Background(), &queue.Task{GPU: 1})
|
||||
require.NoError(t, err)
|
||||
defer gpuLease.Release()
|
||||
|
||||
done := make(chan error, 1)
|
||||
go func() {
|
||||
lease, err := m.Acquire(context.Background(), &queue.Task{CPU: 1})
|
||||
if err == nil {
|
||||
lease.Release()
|
||||
}
|
||||
done <- err
|
||||
}()
|
||||
|
||||
select {
|
||||
case err := <-done:
|
||||
require.NoError(t, err)
|
||||
case <-time.After(200 * time.Millisecond):
|
||||
t.Fatal("cpu-only acquire unexpectedly blocked by gpu saturation")
|
||||
}
|
||||
}
|
||||
|
||||
func TestManager_AcquireMetrics_RecordWaitAndTimeout(t *testing.T) {
|
||||
m, err := resources.NewManager(resources.Options{TotalCPU: 1, GPUCount: 0, SlotsPerGPU: 1})
|
||||
require.NoError(t, err)
|
||||
|
||||
lease, err := m.Acquire(context.Background(), &queue.Task{CPU: 1})
|
||||
require.NoError(t, err)
|
||||
defer lease.Release()
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
|
||||
defer cancel()
|
||||
_, err = m.Acquire(ctx, &queue.Task{CPU: 1})
|
||||
require.Error(t, err)
|
||||
|
||||
s := m.Snapshot()
|
||||
require.GreaterOrEqual(t, s.AcquireTotal, int64(2))
|
||||
require.GreaterOrEqual(t, s.AcquireTimeoutTotal, int64(1))
|
||||
}
|
||||
Loading…
Reference in a new issue