package plugins_test import ( "context" "errors" "testing" "github.com/jfraeys/fetch_ml/internal/container" "github.com/jfraeys/fetch_ml/internal/logging" "github.com/jfraeys/fetch_ml/internal/tracking" "github.com/jfraeys/fetch_ml/internal/tracking/plugins" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) // mockPodmanForTensorBoard implements container.PodmanInterface for TensorBoard testing type mockPodmanForTensorBoard struct { startFunc func(ctx context.Context, cfg *container.ContainerConfig) (string, error) stopFunc func(ctx context.Context, containerID string) error removeFunc func(ctx context.Context, containerID string) error containers map[string]*container.ContainerConfig } func newMockPodmanForTensorBoard() *mockPodmanForTensorBoard { return &mockPodmanForTensorBoard{ containers: make(map[string]*container.ContainerConfig), } } func (m *mockPodmanForTensorBoard) StartContainer(ctx context.Context, cfg *container.ContainerConfig) (string, error) { if m.startFunc != nil { return m.startFunc(ctx, cfg) } id := "mock-tb-" + cfg.Name m.containers[id] = cfg return id, nil } func (m *mockPodmanForTensorBoard) StopContainer(ctx context.Context, containerID string) error { if m.stopFunc != nil { return m.stopFunc(ctx, containerID) } return nil } func (m *mockPodmanForTensorBoard) RemoveContainer(ctx context.Context, containerID string) error { if m.removeFunc != nil { return m.removeFunc(ctx, containerID) } delete(m.containers, containerID) return nil } // TestNewTensorBoardPluginNilPodman tests creation with nil podman func TestNewTensorBoardPluginNilPodman(t *testing.T) { t.Parallel() logger := logging.NewLogger(0, false) opts := plugins.TensorBoardOptions{ LogBasePath: "/tmp/tensorboard", } _, err := plugins.NewTensorBoardPlugin(logger, nil, opts) require.Error(t, err) assert.Contains(t, err.Error(), "podman manager is required") } // TestNewTensorBoardPluginEmptyLogPath tests creation with empty log path func TestNewTensorBoardPluginEmptyLogPath(t *testing.T) { t.Parallel() logger := logging.NewLogger(0, false) mockPodman := newMockPodmanForTensorBoard() opts := plugins.TensorBoardOptions{} _, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts) require.Error(t, err) assert.Contains(t, err.Error(), "log base path is required") } // TestNewTensorBoardPluginDefaults tests default values func TestNewTensorBoardPluginDefaults(t *testing.T) { t.Parallel() logger := logging.NewLogger(0, false) mockPodman := newMockPodmanForTensorBoard() opts := plugins.TensorBoardOptions{ LogBasePath: "/tmp/tensorboard", } plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts) require.NoError(t, err) require.NotNil(t, plugin) } // TestTensorBoardPluginName tests plugin name func TestTensorBoardPluginName(t *testing.T) { t.Parallel() logger := logging.NewLogger(0, false) mockPodman := newMockPodmanForTensorBoard() opts := plugins.TensorBoardOptions{ LogBasePath: "/tmp/tensorboard", } plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts) require.NoError(t, err) assert.Equal(t, "tensorboard", plugin.Name()) } // TestTensorBoardPluginProvisionSidecarDisabled tests disabled mode func TestTensorBoardPluginProvisionSidecarDisabled(t *testing.T) { t.Parallel() logger := logging.NewLogger(0, false) mockPodman := newMockPodmanForTensorBoard() opts := plugins.TensorBoardOptions{ LogBasePath: "/tmp/tensorboard", } plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts) require.NoError(t, err) config := tracking.ToolConfig{ Enabled: false, Mode: tracking.ModeDisabled, } env, err := plugin.ProvisionSidecar(context.Background(), "task-1", config) require.NoError(t, err) assert.Nil(t, env) } // TestTensorBoardPluginProvisionSidecarRemoteMode tests remote mode func TestTensorBoardPluginProvisionSidecarRemoteMode(t *testing.T) { t.Parallel() logger := logging.NewLogger(0, false) mockPodman := newMockPodmanForTensorBoard() opts := plugins.TensorBoardOptions{ LogBasePath: "/tmp/tensorboard", } plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts) require.NoError(t, err) config := tracking.ToolConfig{ Enabled: true, Mode: tracking.ModeRemote, Settings: map[string]any{ "job_name": "test-job", }, } // Remote mode for TensorBoard returns nil, nil (user-managed) env, err := plugin.ProvisionSidecar(context.Background(), "task-1", config) require.NoError(t, err) assert.Nil(t, env) } // TestTensorBoardPluginProvisionSidecarSidecarMode tests sidecar mode (container creation) func TestTensorBoardPluginProvisionSidecarSidecarMode(t *testing.T) { t.Parallel() logger := logging.NewLogger(0, false) mockPodman := newMockPodmanForTensorBoard() allocator := tracking.NewPortAllocator(5700, 5900) opts := plugins.TensorBoardOptions{ LogBasePath: "/tmp/tensorboard", PortAllocator: allocator, } plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts) require.NoError(t, err) config := tracking.ToolConfig{ Enabled: true, Mode: tracking.ModeSidecar, Settings: map[string]any{ "job_name": "test-job", }, } env, err := plugin.ProvisionSidecar(context.Background(), "task-1", config) require.NoError(t, err) require.NotNil(t, env) assert.Contains(t, env, "TENSORBOARD_URL") assert.Contains(t, env, "TENSORBOARD_HOST_LOG_DIR") } // TestTensorBoardPluginProvisionSidecarDefaultJobName tests default job name func TestTensorBoardPluginProvisionSidecarDefaultJobName(t *testing.T) { t.Parallel() logger := logging.NewLogger(0, false) mockPodman := newMockPodmanForTensorBoard() allocator := tracking.NewPortAllocator(5700, 5900) opts := plugins.TensorBoardOptions{ LogBasePath: "/tmp/tensorboard", PortAllocator: allocator, } plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts) require.NoError(t, err) // No job_name provided, should use taskID config := tracking.ToolConfig{ Enabled: true, Mode: tracking.ModeSidecar, Settings: map[string]any{}, } env, err := plugin.ProvisionSidecar(context.Background(), "task-123", config) require.NoError(t, err) require.NotNil(t, env) // Should use task-123 as job name assert.Contains(t, env["TENSORBOARD_HOST_LOG_DIR"], "task-123") } // TestTensorBoardPluginProvisionSidecarStartFailure tests container start failure func TestTensorBoardPluginProvisionSidecarStartFailure(t *testing.T) { t.Parallel() logger := logging.NewLogger(0, false) mockPodman := newMockPodmanForTensorBoard() mockPodman.startFunc = func(ctx context.Context, cfg *container.ContainerConfig) (string, error) { return "", errors.New("failed to start container") } allocator := tracking.NewPortAllocator(5700, 5900) opts := plugins.TensorBoardOptions{ LogBasePath: "/tmp/tensorboard", PortAllocator: allocator, } plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts) require.NoError(t, err) config := tracking.ToolConfig{ Enabled: true, Mode: tracking.ModeSidecar, Settings: map[string]any{ "job_name": "test-job", }, } _, err = plugin.ProvisionSidecar(context.Background(), "task-1", config) require.Error(t, err) assert.Contains(t, err.Error(), "failed to start") } // TestTensorBoardPluginTeardownNonexistent tests teardown for nonexistent task func TestTensorBoardPluginTeardownNonexistent(t *testing.T) { t.Parallel() logger := logging.NewLogger(0, false) mockPodman := newMockPodmanForTensorBoard() opts := plugins.TensorBoardOptions{ LogBasePath: "/tmp/tensorboard", } plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts) require.NoError(t, err) err = plugin.Teardown(context.Background(), "nonexistent-task") require.NoError(t, err) } // TestTensorBoardPluginTeardownWithSidecar tests teardown with running sidecar func TestTensorBoardPluginTeardownWithSidecar(t *testing.T) { t.Parallel() logger := logging.NewLogger(0, false) mockPodman := newMockPodmanForTensorBoard() allocator := tracking.NewPortAllocator(5700, 5900) opts := plugins.TensorBoardOptions{ LogBasePath: "/tmp/tensorboard", PortAllocator: allocator, } plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts) require.NoError(t, err) // Create a sidecar first config := tracking.ToolConfig{ Enabled: true, Mode: tracking.ModeSidecar, Settings: map[string]any{ "job_name": "test-job", }, } _, err = plugin.ProvisionSidecar(context.Background(), "task-1", config) require.NoError(t, err) // Now teardown err = plugin.Teardown(context.Background(), "task-1") require.NoError(t, err) } // TestTensorBoardPluginHealthCheck tests health check func TestTensorBoardPluginHealthCheck(t *testing.T) { t.Parallel() logger := logging.NewLogger(0, false) mockPodman := newMockPodmanForTensorBoard() opts := plugins.TensorBoardOptions{ LogBasePath: "/tmp/tensorboard", } plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts) require.NoError(t, err) // Health check always returns true for now healthy := plugin.HealthCheck(context.Background(), tracking.ToolConfig{}) assert.True(t, healthy) } // TestTensorBoardPluginCustomImage tests custom image option func TestTensorBoardPluginCustomImage(t *testing.T) { t.Parallel() logger := logging.NewLogger(0, false) mockPodman := newMockPodmanForTensorBoard() opts := plugins.TensorBoardOptions{ LogBasePath: "/tmp/tensorboard", Image: "custom/tensorboard:latest", PortAllocator: tracking.NewPortAllocator(5700, 5900), } plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts) require.NoError(t, err) require.NotNil(t, plugin) // Provision sidecar and verify custom image is used config := tracking.ToolConfig{ Enabled: true, Mode: tracking.ModeSidecar, Settings: map[string]any{ "job_name": "test-job", }, } _, err = plugin.ProvisionSidecar(context.Background(), "task-1", config) require.NoError(t, err) // Verify the custom image was used in container config require.NotEmpty(t, mockPodman.containers, "container should have been created") for _, cfg := range mockPodman.containers { assert.Equal(t, "custom/tensorboard:latest", cfg.Image, "custom image should be used") } } // TestTensorBoardPluginDefaultImage tests that default image is set func TestTensorBoardPluginDefaultImage(t *testing.T) { t.Parallel() logger := logging.NewLogger(0, false) mockPodman := newMockPodmanForTensorBoard() opts := plugins.TensorBoardOptions{ LogBasePath: "/tmp/tensorboard", PortAllocator: tracking.NewPortAllocator(5700, 5900), // Image not specified - should default to tensorflow/tensorflow:2.17.0 } plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts) require.NoError(t, err) require.NotNil(t, plugin) // Provision sidecar and verify default image is used config := tracking.ToolConfig{ Enabled: true, Mode: tracking.ModeSidecar, Settings: map[string]any{ "job_name": "test-job", }, } _, err = plugin.ProvisionSidecar(context.Background(), "task-1", config) require.NoError(t, err) // Verify the default image was used in container config require.NotEmpty(t, mockPodman.containers, "container should have been created") for _, cfg := range mockPodman.containers { assert.Equal(t, "tensorflow/tensorflow:2.17.0", cfg.Image, "default image should be used") } }