package scheduler_test import ( "testing" "github.com/jfraeys/fetch_ml/internal/scheduler" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func TestPluginQuotaManager_CheckQuota_Disabled(t *testing.T) { // When quota is disabled, all jobs should pass config := scheduler.PluginQuotaConfig{ Enabled: false, TotalGPUs: 1, // Set a low limit that would fail if enabled } m := scheduler.NewPluginQuotaManager(config) err := m.CheckQuota("user1", "plugin1", 100) assert.NoError(t, err) } func TestPluginQuotaManager_CheckQuota_GlobalLimit(t *testing.T) { config := scheduler.PluginQuotaConfig{ Enabled: true, TotalGPUs: 4, } m := scheduler.NewPluginQuotaManager(config) // First job should succeed err := m.CheckQuota("user1", "plugin1", 2) require.NoError(t, err) // Record the usage m.RecordUsage("user1", "plugin1", 2) // Second job should succeed (2+2=4, within limit) err = m.CheckQuota("user2", "plugin2", 2) require.NoError(t, err) m.RecordUsage("user2", "plugin2", 2) // Third job should fail (would exceed global limit) err = m.CheckQuota("user3", "plugin3", 1) assert.Error(t, err) assert.Contains(t, err.Error(), "global GPU limit exceeded") } func TestPluginQuotaManager_CheckQuota_PerUserGPULimit(t *testing.T) { config := scheduler.PluginQuotaConfig{ Enabled: true, TotalGPUs: 10, PerUserGPUs: 3, } m := scheduler.NewPluginQuotaManager(config) // User1: first job should succeed err := m.CheckQuota("user1", "plugin1", 2) require.NoError(t, err) m.RecordUsage("user1", "plugin1", 2) // User1: second job should succeed (2+1=3, at limit) err = m.CheckQuota("user1", "plugin2", 1) require.NoError(t, err) m.RecordUsage("user1", "plugin2", 1) // User1: third job should fail (would exceed per-user limit) err = m.CheckQuota("user1", "plugin3", 1) assert.Error(t, err) assert.Contains(t, err.Error(), "user user1 GPU limit exceeded") // User2: job should succeed (different user) err = m.CheckQuota("user2", "plugin1", 3) assert.NoError(t, err) } func TestPluginQuotaManager_CheckQuota_PerUserServiceLimit(t *testing.T) { config := scheduler.PluginQuotaConfig{ Enabled: true, TotalGPUs: 10, PerUserGPUs: 10, PerUserServices: 2, } m := scheduler.NewPluginQuotaManager(config) // User1: first service should succeed err := m.CheckQuota("user1", "plugin1", 1) require.NoError(t, err) m.RecordUsage("user1", "plugin1", 1) // User1: second service should succeed err = m.CheckQuota("user1", "plugin2", 1) require.NoError(t, err) m.RecordUsage("user1", "plugin2", 1) // User1: third service should fail (would exceed service count limit) err = m.CheckQuota("user1", "plugin3", 1) assert.Error(t, err) assert.Contains(t, err.Error(), "user user1 service limit exceeded") } func TestPluginQuotaManager_CheckQuota_UserOverride(t *testing.T) { config := scheduler.PluginQuotaConfig{ Enabled: true, TotalGPUs: 10, PerUserGPUs: 2, PerUserServices: 2, UserOverrides: map[string]scheduler.UserLimit{ "vip-user": { MaxGPUs: 5, MaxServices: 10, }, }, } m := scheduler.NewPluginQuotaManager(config) // Regular user: limited by default err := m.CheckQuota("regular", "plugin1", 3) assert.Error(t, err) assert.Contains(t, err.Error(), "regular GPU limit exceeded") // VIP user: has higher limit err = m.CheckQuota("vip-user", "plugin1", 4) require.NoError(t, err) m.RecordUsage("vip-user", "plugin1", 4) // VIP user: still within limit err = m.CheckQuota("vip-user", "plugin2", 1) assert.NoError(t, err) } func TestPluginQuotaManager_CheckQuota_PluginSpecificLimit(t *testing.T) { config := scheduler.PluginQuotaConfig{ Enabled: true, TotalGPUs: 10, PerUserGPUs: 10, PerPluginLimits: map[string]scheduler.PluginLimit{ "jupyter": { MaxGPUs: 3, MaxServices: 2, }, "vllm": { MaxGPUs: 8, MaxServices: 4, }, }, } m := scheduler.NewPluginQuotaManager(config) // Jupyter: within plugin GPU limit err := m.CheckQuota("user1", "jupyter", 2) require.NoError(t, err) m.RecordUsage("user1", "jupyter", 2) // Jupyter: exceed plugin GPU limit (but within global and user limits) err = m.CheckQuota("user2", "jupyter", 2) assert.Error(t, err) assert.Contains(t, err.Error(), "plugin jupyter GPU limit exceeded") // vLLM: within its higher limit err = m.CheckQuota("user1", "vllm", 4) assert.NoError(t, err) } func TestPluginQuotaManager_CheckQuota_PluginServiceLimit(t *testing.T) { config := scheduler.PluginQuotaConfig{ Enabled: true, TotalGPUs: 10, PerUserGPUs: 10, PerUserServices: 10, PerPluginLimits: map[string]scheduler.PluginLimit{ "jupyter": { MaxGPUs: 10, MaxServices: 2, // Only 2 jupyter services total }, }, } m := scheduler.NewPluginQuotaManager(config) // First jupyter service err := m.CheckQuota("user1", "jupyter", 1) require.NoError(t, err) m.RecordUsage("user1", "jupyter", 1) // Second jupyter service (different user) err = m.CheckQuota("user2", "jupyter", 1) require.NoError(t, err) m.RecordUsage("user2", "jupyter", 1) // Third jupyter service should fail (plugin service limit reached) err = m.CheckQuota("user3", "jupyter", 1) assert.Error(t, err) assert.Contains(t, err.Error(), "plugin jupyter service limit exceeded") } func TestPluginQuotaManager_CheckQuota_AllowedPlugins(t *testing.T) { config := scheduler.PluginQuotaConfig{ Enabled: true, TotalGPUs: 10, PerUserGPUs: 10, UserOverrides: map[string]scheduler.UserLimit{ "restricted-user": { MaxGPUs: 5, MaxServices: 5, AllowedPlugins: []string{"jupyter"}, }, }, } m := scheduler.NewPluginQuotaManager(config) // Restricted user can use allowed plugin err := m.CheckQuota("restricted-user", "jupyter", 2) assert.NoError(t, err) // Restricted user cannot use other plugins err = m.CheckQuota("restricted-user", "vllm", 2) assert.Error(t, err) assert.Contains(t, err.Error(), "not allowed to use plugin vllm") // Regular user can use any plugin err = m.CheckQuota("regular-user", "vllm", 2) assert.NoError(t, err) } func TestPluginQuotaManager_RecordAndReleaseUsage(t *testing.T) { config := scheduler.PluginQuotaConfig{ Enabled: true, TotalGPUs: 10, PerUserGPUs: 5, } m := scheduler.NewPluginQuotaManager(config) // Record usage m.RecordUsage("user1", "jupyter", 2) m.RecordUsage("user1", "vllm", 1) m.RecordUsage("user2", "jupyter", 3) // Check usage tracking usage, totalGPUs := m.GetUsage("user1") assert.Equal(t, 2, usage["jupyter"].GPUs) assert.Equal(t, 1, usage["jupyter"].Services) assert.Equal(t, 1, usage["vllm"].GPUs) assert.Equal(t, 1, usage["vllm"].Services) assert.Equal(t, 3, totalGPUs) // Check global usage globalGPUs, pluginTotals := m.GetGlobalUsage() assert.Equal(t, 6, globalGPUs) assert.Equal(t, 5, pluginTotals["jupyter"]) // 2+3 assert.Equal(t, 1, pluginTotals["vllm"]) // Release usage m.ReleaseUsage("user1", "jupyter", 2) // Verify release usage, totalGPUs = m.GetUsage("user1") assert.Equal(t, 0, usage["jupyter"].GPUs) assert.Equal(t, 0, usage["jupyter"].Services) assert.Equal(t, 1, usage["vllm"].GPUs) // user1 still has vllm assert.Equal(t, 1, totalGPUs) // only vllm remains for user1 // Check global usage after release globalGPUs, pluginTotals = m.GetGlobalUsage() assert.Equal(t, 4, globalGPUs) assert.Equal(t, 3, pluginTotals["jupyter"]) // 3 from user2 assert.Equal(t, 1, pluginTotals["vllm"]) } func TestPluginQuotaManager_RecordUsage_Disabled(t *testing.T) { config := scheduler.PluginQuotaConfig{ Enabled: false, TotalGPUs: 10, } m := scheduler.NewPluginQuotaManager(config) // Recording usage when disabled should not crash m.RecordUsage("user1", "plugin1", 5) // Usage should be empty (not tracked) usage, totalGPUs := m.GetUsage("user1") assert.Empty(t, usage) assert.Equal(t, 0, totalGPUs) } func TestPluginQuotaManager_ReleaseUsage_NonExistent(t *testing.T) { config := scheduler.PluginQuotaConfig{ Enabled: true, TotalGPUs: 10, } m := scheduler.NewPluginQuotaManager(config) // Releasing non-existent usage should not crash or go negative m.ReleaseUsage("nonexistent", "plugin1", 5) // Global usage should remain 0 globalGPUs, _ := m.GetGlobalUsage() assert.Equal(t, 0, globalGPUs) } func TestPluginQuotaManager_CheckQuota_AnonymousUser(t *testing.T) { config := scheduler.PluginQuotaConfig{ Enabled: true, TotalGPUs: 10, PerUserGPUs: 2, PerUserServices: 2, } m := scheduler.NewPluginQuotaManager(config) // Empty userID should be treated as "anonymous" err := m.CheckQuota("", "plugin1", 2) require.NoError(t, err) m.RecordUsage("", "plugin1", 2) // Second request from anonymous should fail (at limit) err = m.CheckQuota("", "plugin1", 1) assert.Error(t, err) assert.Contains(t, err.Error(), "user anonymous GPU limit exceeded") } func TestPluginQuotaManager_CheckQuota_DefaultPlugin(t *testing.T) { config := scheduler.PluginQuotaConfig{ Enabled: true, TotalGPUs: 10, PerUserGPUs: 5, PerUserServices: 5, PerPluginLimits: map[string]scheduler.PluginLimit{ "default": { MaxGPUs: 2, MaxServices: 2, }, }, } m := scheduler.NewPluginQuotaManager(config) // Empty plugin name should be treated as "default" err := m.CheckQuota("user1", "", 1) require.NoError(t, err) m.RecordUsage("user1", "", 1) // Exceed default plugin limit err = m.CheckQuota("user2", "", 2) assert.Error(t, err) assert.Contains(t, err.Error(), "plugin default GPU limit exceeded") } func TestPluginQuotaManager_ConcurrentAccess(t *testing.T) { config := scheduler.PluginQuotaConfig{ Enabled: true, TotalGPUs: 100, PerUserGPUs: 50, PerUserServices: 50, } m := scheduler.NewPluginQuotaManager(config) // Concurrently record usage from multiple goroutines done := make(chan bool, 10) for i := 0; i < 10; i++ { go func(idx int) { user := "user" if idx%2 == 0 { user = "user1" } else { user = "user2" } m.RecordUsage(user, "plugin1", 1) done <- true }(i) } // Wait for all goroutines for i := 0; i < 10; i++ { <-done } // Verify totals globalGPUs, _ := m.GetGlobalUsage() assert.Equal(t, 10, globalGPUs) usage1, _ := m.GetUsage("user1") assert.Equal(t, 5, usage1["plugin1"].GPUs) assert.Equal(t, 5, usage1["plugin1"].Services) usage2, _ := m.GetUsage("user2") assert.Equal(t, 5, usage2["plugin1"].GPUs) assert.Equal(t, 5, usage2["plugin1"].Services) }