fetch_ml/internal/tracking/plugins/tensorboard_test.go
Jeremie Fraeys 7efefa1933
feat(native): implement Rust native layer as a test
- queue_index: mmap-based priority queue with safe storage wrapper
- dataset_hash: BLAKE3 parallel hashing with rayon
- common: FFI utilities with panic recovery
- Minimal deps: ~20 total (rayon, blake3, memmap2, walkdir, chrono)
- Drop crossbeam, prometheus - use stdlib + manual metrics
- Makefile: cargo build targets, help text updated
- Forgejo CI: clippy, tests, miri, cargo-deny
- C FFI compatible with existing Go bindings
2026-03-14 17:45:58 -04:00

386 lines
11 KiB
Go

package plugins_test
import (
"context"
"errors"
"testing"
"github.com/jfraeys/fetch_ml/internal/container"
"github.com/jfraeys/fetch_ml/internal/logging"
"github.com/jfraeys/fetch_ml/internal/tracking"
"github.com/jfraeys/fetch_ml/internal/tracking/plugins"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// mockPodmanForTensorBoard implements container.PodmanInterface for TensorBoard testing
type mockPodmanForTensorBoard struct {
startFunc func(ctx context.Context, cfg *container.ContainerConfig) (string, error)
stopFunc func(ctx context.Context, containerID string) error
removeFunc func(ctx context.Context, containerID string) error
containers map[string]*container.ContainerConfig
}
func newMockPodmanForTensorBoard() *mockPodmanForTensorBoard {
return &mockPodmanForTensorBoard{
containers: make(map[string]*container.ContainerConfig),
}
}
func (m *mockPodmanForTensorBoard) StartContainer(ctx context.Context, cfg *container.ContainerConfig) (string, error) {
if m.startFunc != nil {
return m.startFunc(ctx, cfg)
}
id := "mock-tb-" + cfg.Name
m.containers[id] = cfg
return id, nil
}
func (m *mockPodmanForTensorBoard) StopContainer(ctx context.Context, containerID string) error {
if m.stopFunc != nil {
return m.stopFunc(ctx, containerID)
}
return nil
}
func (m *mockPodmanForTensorBoard) RemoveContainer(ctx context.Context, containerID string) error {
if m.removeFunc != nil {
return m.removeFunc(ctx, containerID)
}
delete(m.containers, containerID)
return nil
}
// TestNewTensorBoardPluginNilPodman tests creation with nil podman
func TestNewTensorBoardPluginNilPodman(t *testing.T) {
t.Parallel()
logger := logging.NewLogger(0, false)
opts := plugins.TensorBoardOptions{
LogBasePath: "/tmp/tensorboard",
}
_, err := plugins.NewTensorBoardPlugin(logger, nil, opts)
require.Error(t, err)
assert.Contains(t, err.Error(), "podman manager is required")
}
// TestNewTensorBoardPluginEmptyLogPath tests creation with empty log path
func TestNewTensorBoardPluginEmptyLogPath(t *testing.T) {
t.Parallel()
logger := logging.NewLogger(0, false)
mockPodman := newMockPodmanForTensorBoard()
opts := plugins.TensorBoardOptions{}
_, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts)
require.Error(t, err)
assert.Contains(t, err.Error(), "log base path is required")
}
// TestNewTensorBoardPluginDefaults tests default values
func TestNewTensorBoardPluginDefaults(t *testing.T) {
t.Parallel()
logger := logging.NewLogger(0, false)
mockPodman := newMockPodmanForTensorBoard()
opts := plugins.TensorBoardOptions{
LogBasePath: "/tmp/tensorboard",
}
plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts)
require.NoError(t, err)
require.NotNil(t, plugin)
}
// TestTensorBoardPluginName tests plugin name
func TestTensorBoardPluginName(t *testing.T) {
t.Parallel()
logger := logging.NewLogger(0, false)
mockPodman := newMockPodmanForTensorBoard()
opts := plugins.TensorBoardOptions{
LogBasePath: "/tmp/tensorboard",
}
plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts)
require.NoError(t, err)
assert.Equal(t, "tensorboard", plugin.Name())
}
// TestTensorBoardPluginProvisionSidecarDisabled tests disabled mode
func TestTensorBoardPluginProvisionSidecarDisabled(t *testing.T) {
t.Parallel()
logger := logging.NewLogger(0, false)
mockPodman := newMockPodmanForTensorBoard()
opts := plugins.TensorBoardOptions{
LogBasePath: "/tmp/tensorboard",
}
plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts)
require.NoError(t, err)
config := tracking.ToolConfig{
Enabled: false,
Mode: tracking.ModeDisabled,
}
env, err := plugin.ProvisionSidecar(context.Background(), "task-1", config)
require.NoError(t, err)
assert.Nil(t, env)
}
// TestTensorBoardPluginProvisionSidecarRemoteMode tests remote mode
func TestTensorBoardPluginProvisionSidecarRemoteMode(t *testing.T) {
t.Parallel()
logger := logging.NewLogger(0, false)
mockPodman := newMockPodmanForTensorBoard()
opts := plugins.TensorBoardOptions{
LogBasePath: "/tmp/tensorboard",
}
plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts)
require.NoError(t, err)
config := tracking.ToolConfig{
Enabled: true,
Mode: tracking.ModeRemote,
Settings: map[string]any{
"job_name": "test-job",
},
}
// Remote mode for TensorBoard returns nil, nil (user-managed)
env, err := plugin.ProvisionSidecar(context.Background(), "task-1", config)
require.NoError(t, err)
assert.Nil(t, env)
}
// TestTensorBoardPluginProvisionSidecarSidecarMode tests sidecar mode (container creation)
func TestTensorBoardPluginProvisionSidecarSidecarMode(t *testing.T) {
t.Parallel()
logger := logging.NewLogger(0, false)
mockPodman := newMockPodmanForTensorBoard()
allocator := tracking.NewPortAllocator(5700, 5900)
opts := plugins.TensorBoardOptions{
LogBasePath: "/tmp/tensorboard",
PortAllocator: allocator,
}
plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts)
require.NoError(t, err)
config := tracking.ToolConfig{
Enabled: true,
Mode: tracking.ModeSidecar,
Settings: map[string]any{
"job_name": "test-job",
},
}
env, err := plugin.ProvisionSidecar(context.Background(), "task-1", config)
require.NoError(t, err)
require.NotNil(t, env)
assert.Contains(t, env, "TENSORBOARD_URL")
assert.Contains(t, env, "TENSORBOARD_HOST_LOG_DIR")
}
// TestTensorBoardPluginProvisionSidecarDefaultJobName tests default job name
func TestTensorBoardPluginProvisionSidecarDefaultJobName(t *testing.T) {
t.Parallel()
logger := logging.NewLogger(0, false)
mockPodman := newMockPodmanForTensorBoard()
allocator := tracking.NewPortAllocator(5700, 5900)
opts := plugins.TensorBoardOptions{
LogBasePath: "/tmp/tensorboard",
PortAllocator: allocator,
}
plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts)
require.NoError(t, err)
// No job_name provided, should use taskID
config := tracking.ToolConfig{
Enabled: true,
Mode: tracking.ModeSidecar,
Settings: map[string]any{},
}
env, err := plugin.ProvisionSidecar(context.Background(), "task-123", config)
require.NoError(t, err)
require.NotNil(t, env)
// Should use task-123 as job name
assert.Contains(t, env["TENSORBOARD_HOST_LOG_DIR"], "task-123")
}
// TestTensorBoardPluginProvisionSidecarStartFailure tests container start failure
func TestTensorBoardPluginProvisionSidecarStartFailure(t *testing.T) {
t.Parallel()
logger := logging.NewLogger(0, false)
mockPodman := newMockPodmanForTensorBoard()
mockPodman.startFunc = func(ctx context.Context, cfg *container.ContainerConfig) (string, error) {
return "", errors.New("failed to start container")
}
allocator := tracking.NewPortAllocator(5700, 5900)
opts := plugins.TensorBoardOptions{
LogBasePath: "/tmp/tensorboard",
PortAllocator: allocator,
}
plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts)
require.NoError(t, err)
config := tracking.ToolConfig{
Enabled: true,
Mode: tracking.ModeSidecar,
Settings: map[string]any{
"job_name": "test-job",
},
}
_, err = plugin.ProvisionSidecar(context.Background(), "task-1", config)
require.Error(t, err)
assert.Contains(t, err.Error(), "failed to start")
}
// TestTensorBoardPluginTeardownNonexistent tests teardown for nonexistent task
func TestTensorBoardPluginTeardownNonexistent(t *testing.T) {
t.Parallel()
logger := logging.NewLogger(0, false)
mockPodman := newMockPodmanForTensorBoard()
opts := plugins.TensorBoardOptions{
LogBasePath: "/tmp/tensorboard",
}
plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts)
require.NoError(t, err)
err = plugin.Teardown(context.Background(), "nonexistent-task")
require.NoError(t, err)
}
// TestTensorBoardPluginTeardownWithSidecar tests teardown with running sidecar
func TestTensorBoardPluginTeardownWithSidecar(t *testing.T) {
t.Parallel()
logger := logging.NewLogger(0, false)
mockPodman := newMockPodmanForTensorBoard()
allocator := tracking.NewPortAllocator(5700, 5900)
opts := plugins.TensorBoardOptions{
LogBasePath: "/tmp/tensorboard",
PortAllocator: allocator,
}
plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts)
require.NoError(t, err)
// Create a sidecar first
config := tracking.ToolConfig{
Enabled: true,
Mode: tracking.ModeSidecar,
Settings: map[string]any{
"job_name": "test-job",
},
}
_, err = plugin.ProvisionSidecar(context.Background(), "task-1", config)
require.NoError(t, err)
// Now teardown
err = plugin.Teardown(context.Background(), "task-1")
require.NoError(t, err)
}
// TestTensorBoardPluginHealthCheck tests health check
func TestTensorBoardPluginHealthCheck(t *testing.T) {
t.Parallel()
logger := logging.NewLogger(0, false)
mockPodman := newMockPodmanForTensorBoard()
opts := plugins.TensorBoardOptions{
LogBasePath: "/tmp/tensorboard",
}
plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts)
require.NoError(t, err)
// Health check always returns true for now
healthy := plugin.HealthCheck(context.Background(), tracking.ToolConfig{})
assert.True(t, healthy)
}
// TestTensorBoardPluginCustomImage tests custom image option
func TestTensorBoardPluginCustomImage(t *testing.T) {
t.Parallel()
logger := logging.NewLogger(0, false)
mockPodman := newMockPodmanForTensorBoard()
opts := plugins.TensorBoardOptions{
LogBasePath: "/tmp/tensorboard",
Image: "custom/tensorboard:latest",
PortAllocator: tracking.NewPortAllocator(5700, 5900),
}
plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts)
require.NoError(t, err)
require.NotNil(t, plugin)
// Provision sidecar and verify custom image is used
config := tracking.ToolConfig{
Enabled: true,
Mode: tracking.ModeSidecar,
Settings: map[string]any{
"job_name": "test-job",
},
}
_, err = plugin.ProvisionSidecar(context.Background(), "task-1", config)
require.NoError(t, err)
// Verify the custom image was used in container config
require.NotEmpty(t, mockPodman.containers, "container should have been created")
for _, cfg := range mockPodman.containers {
assert.Equal(t, "custom/tensorboard:latest", cfg.Image, "custom image should be used")
}
}
// TestTensorBoardPluginDefaultImage tests that default image is set
func TestTensorBoardPluginDefaultImage(t *testing.T) {
t.Parallel()
logger := logging.NewLogger(0, false)
mockPodman := newMockPodmanForTensorBoard()
opts := plugins.TensorBoardOptions{
LogBasePath: "/tmp/tensorboard",
PortAllocator: tracking.NewPortAllocator(5700, 5900),
// Image not specified - should default to tensorflow/tensorflow:2.17.0
}
plugin, err := plugins.NewTensorBoardPlugin(logger, mockPodman, opts)
require.NoError(t, err)
require.NotNil(t, plugin)
// Provision sidecar and verify default image is used
config := tracking.ToolConfig{
Enabled: true,
Mode: tracking.ModeSidecar,
Settings: map[string]any{
"job_name": "test-job",
},
}
_, err = plugin.ProvisionSidecar(context.Background(), "task-1", config)
require.NoError(t, err)
// Verify the default image was used in container config
require.NotEmpty(t, mockPodman.containers, "container should have been created")
for _, cfg := range mockPodman.containers {
assert.Equal(t, "tensorflow/tensorflow:2.17.0", cfg.Image, "default image should be used")
}
}