Add comprehensive database storage layer for new features: - db_groups.go: Lab group management with members, roles (admin/member/viewer), and group-based task visibility queries - db_tasks.go: Task visibility system (private/lab/institution/open), task sharing with expiry, public clone tokens, and optimized ListTasksForUser() for access control - db_tokens.go: Secure token management for public task access and cloning, with SHA-256 hashed token storage and automatic cleanup - db_audit.go: Audit log persistence with checkpoint chains, tamper detection, and log rotation support - schema_sqlite.sql: Updated schema with: - groups, group_members tables - tasks.visibility enum, task_shares with expiry - access_tokens table with hashed tokens - audit_logs, audit_checkpoints tables - indexes for all foreign keys and query patterns - db_experiments.go: Add CascadeVisibilityToTasks() for propagating visibility changes from experiments to associated tasks
257 lines
9.6 KiB
SQL
257 lines
9.6 KiB
SQL
-- SQLite schema for Fetch ML job persistence
|
|
-- Complements Redis for task queuing
|
|
|
|
CREATE TABLE IF NOT EXISTS jobs (
|
|
id TEXT PRIMARY KEY,
|
|
job_name TEXT NOT NULL,
|
|
args TEXT,
|
|
status TEXT NOT NULL DEFAULT 'pending',
|
|
priority INTEGER DEFAULT 0,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
started_at DATETIME,
|
|
ended_at DATETIME,
|
|
worker_id TEXT,
|
|
user_id TEXT,
|
|
error TEXT,
|
|
datasets TEXT, -- JSON array
|
|
metadata TEXT, -- JSON object
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
visibility TEXT NOT NULL DEFAULT 'lab',
|
|
experiment_id TEXT REFERENCES experiments(id)
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS job_metrics (
|
|
job_id TEXT,
|
|
metric_name TEXT,
|
|
metric_value TEXT,
|
|
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (job_id, metric_name, timestamp),
|
|
FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS workers (
|
|
id TEXT PRIMARY KEY,
|
|
hostname TEXT,
|
|
last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
status TEXT DEFAULT 'active',
|
|
current_jobs INTEGER DEFAULT 0,
|
|
max_jobs INTEGER DEFAULT 1,
|
|
metadata TEXT -- JSON object
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS system_metrics (
|
|
metric_name TEXT,
|
|
metric_value TEXT,
|
|
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (metric_name, timestamp)
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS experiments (
|
|
id TEXT PRIMARY KEY,
|
|
name TEXT NOT NULL,
|
|
description TEXT,
|
|
status TEXT DEFAULT 'pending',
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
user_id TEXT,
|
|
workspace_id TEXT
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS experiment_environments (
|
|
experiment_id TEXT PRIMARY KEY,
|
|
python_version TEXT,
|
|
cuda_version TEXT,
|
|
system_os TEXT,
|
|
system_arch TEXT,
|
|
hostname TEXT,
|
|
requirements_hash TEXT,
|
|
conda_env_hash TEXT,
|
|
dependencies TEXT,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS experiment_git_info (
|
|
experiment_id TEXT PRIMARY KEY,
|
|
commit_sha TEXT,
|
|
branch TEXT,
|
|
remote_url TEXT,
|
|
is_dirty INTEGER DEFAULT 0,
|
|
diff_patch TEXT,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS experiment_seeds (
|
|
experiment_id TEXT PRIMARY KEY,
|
|
numpy_seed INTEGER,
|
|
torch_seed INTEGER,
|
|
tensorflow_seed INTEGER,
|
|
random_seed INTEGER,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS datasets (
|
|
name TEXT PRIMARY KEY,
|
|
url TEXT NOT NULL,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
-- Indexes for performance
|
|
CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status);
|
|
CREATE INDEX IF NOT EXISTS idx_jobs_created_at ON jobs(created_at);
|
|
CREATE INDEX IF NOT EXISTS idx_jobs_worker_id ON jobs(worker_id);
|
|
CREATE INDEX IF NOT EXISTS idx_job_metrics_job_id ON job_metrics(job_id);
|
|
CREATE INDEX IF NOT EXISTS idx_job_metrics_timestamp ON job_metrics(timestamp);
|
|
CREATE INDEX IF NOT EXISTS idx_workers_heartbeat ON workers(last_heartbeat);
|
|
CREATE INDEX IF NOT EXISTS idx_system_metrics_timestamp ON system_metrics(timestamp);
|
|
CREATE INDEX IF NOT EXISTS idx_experiments_created_at ON experiments(created_at);
|
|
CREATE INDEX IF NOT EXISTS idx_experiments_status ON experiments(status);
|
|
CREATE INDEX IF NOT EXISTS idx_experiments_user_id ON experiments(user_id);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_datasets_name ON datasets(name);
|
|
|
|
-- Triggers to update timestamps
|
|
CREATE TRIGGER IF NOT EXISTS update_jobs_timestamp
|
|
AFTER UPDATE ON jobs
|
|
FOR EACH ROW
|
|
BEGIN
|
|
UPDATE jobs SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
|
|
END;
|
|
|
|
CREATE TRIGGER IF NOT EXISTS update_experiments_timestamp
|
|
AFTER UPDATE ON experiments
|
|
FOR EACH ROW
|
|
BEGIN
|
|
UPDATE experiments SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
|
|
END;
|
|
|
|
CREATE TRIGGER IF NOT EXISTS update_datasets_timestamp
|
|
AFTER UPDATE ON datasets
|
|
FOR EACH ROW
|
|
BEGIN
|
|
UPDATE datasets SET updated_at = CURRENT_TIMESTAMP WHERE name = NEW.name;
|
|
END;
|
|
|
|
-- WebSocket metrics table for tracking real-time metrics
|
|
CREATE TABLE IF NOT EXISTS websocket_metrics (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
metric_name TEXT NOT NULL,
|
|
metric_value REAL NOT NULL,
|
|
user TEXT,
|
|
recorded_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
);
|
|
|
|
CREATE INDEX IF NOT EXISTS idx_websocket_metrics_name_time ON websocket_metrics(metric_name, recorded_at);
|
|
|
|
-- Groups and membership for lab-based task sharing
|
|
CREATE TABLE IF NOT EXISTS groups (
|
|
id TEXT PRIMARY KEY,
|
|
name TEXT NOT NULL UNIQUE,
|
|
description TEXT,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
created_by TEXT NOT NULL
|
|
);
|
|
|
|
CREATE TABLE IF NOT EXISTS group_members (
|
|
group_id TEXT NOT NULL,
|
|
user_id TEXT NOT NULL,
|
|
role TEXT DEFAULT 'member', -- 'admin', 'member', 'viewer'
|
|
joined_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (group_id, user_id),
|
|
FOREIGN KEY (group_id) REFERENCES groups(id) ON DELETE CASCADE
|
|
);
|
|
|
|
-- System group for institution visibility (all authenticated users)
|
|
INSERT OR IGNORE INTO groups (id, name, description, created_by)
|
|
VALUES ('all-users', 'all-users', 'System group: all authenticated users', 'system');
|
|
|
|
-- Invite-and-accept flow: group admins invite; users accept or decline
|
|
CREATE TABLE IF NOT EXISTS group_invitations (
|
|
id TEXT PRIMARY KEY,
|
|
group_id TEXT NOT NULL,
|
|
invited_user_id TEXT NOT NULL,
|
|
invited_by TEXT NOT NULL,
|
|
status TEXT DEFAULT 'pending', -- 'pending', 'accepted', 'declined'
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
expires_at DATETIME, -- NULL = 7d default enforced in app layer
|
|
FOREIGN KEY (group_id) REFERENCES groups(id) ON DELETE CASCADE
|
|
);
|
|
|
|
-- Experiment/project grouping: share a whole experiment, not individual tasks
|
|
-- Note: experiments table already exists; adding group_id to link with sharing system
|
|
ALTER TABLE experiments ADD COLUMN group_id TEXT REFERENCES groups(id);
|
|
|
|
-- Link tasks to experiments
|
|
CREATE TABLE IF NOT EXISTS experiment_tasks (
|
|
experiment_id TEXT NOT NULL,
|
|
task_id TEXT NOT NULL,
|
|
added_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
PRIMARY KEY (experiment_id, task_id),
|
|
FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE,
|
|
FOREIGN KEY (task_id) REFERENCES jobs(id) ON DELETE CASCADE
|
|
);
|
|
|
|
-- Per-user explicit shares with optional expiry
|
|
CREATE TABLE IF NOT EXISTS task_shares (
|
|
task_id TEXT NOT NULL,
|
|
user_id TEXT NOT NULL,
|
|
granted_by TEXT NOT NULL,
|
|
granted_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
expires_at DATETIME, -- NULL = no expiry; checked at access time
|
|
PRIMARY KEY (task_id, user_id),
|
|
FOREIGN KEY (task_id) REFERENCES jobs(id) ON DELETE CASCADE
|
|
);
|
|
|
|
-- Group-level task association
|
|
-- Records which group a task is associated with at submit time.
|
|
-- Actual membership is always resolved live from group_members.
|
|
CREATE TABLE IF NOT EXISTS task_group_access (
|
|
task_id TEXT NOT NULL,
|
|
group_id TEXT NOT NULL,
|
|
PRIMARY KEY (task_id, group_id),
|
|
FOREIGN KEY (task_id) REFERENCES jobs(id) ON DELETE CASCADE,
|
|
FOREIGN KEY (group_id) REFERENCES groups(id) ON DELETE CASCADE
|
|
);
|
|
|
|
-- Signed share tokens for unauthenticated open access (paper reproducibility links)
|
|
CREATE TABLE IF NOT EXISTS share_tokens (
|
|
token TEXT PRIMARY KEY, -- cryptographically random (32 bytes, base64url)
|
|
task_id TEXT, -- NULL if experiment-level
|
|
experiment_id TEXT, -- NULL if task-level
|
|
created_by TEXT NOT NULL,
|
|
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
expires_at DATETIME, -- NULL = never expires
|
|
access_count INTEGER DEFAULT 0,
|
|
max_accesses INTEGER, -- NULL = unlimited
|
|
FOREIGN KEY (task_id) REFERENCES jobs(id) ON DELETE CASCADE,
|
|
FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE
|
|
);
|
|
|
|
-- Audit log for task access
|
|
CREATE TABLE IF NOT EXISTS task_access_log (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
task_id TEXT NOT NULL,
|
|
user_id TEXT, -- NULL for token-based access
|
|
token TEXT, -- NULL for session-based access
|
|
action TEXT NOT NULL, -- 'view', 'clone', 'execute', 'modify'
|
|
accessed_at DATETIME DEFAULT CURRENT_TIMESTAMP,
|
|
ip_address TEXT,
|
|
FOREIGN KEY (task_id) REFERENCES jobs(id) ON DELETE CASCADE
|
|
);
|
|
|
|
-- Indexes for task sharing performance
|
|
CREATE INDEX IF NOT EXISTS idx_jobs_visibility ON jobs(visibility);
|
|
CREATE INDEX IF NOT EXISTS idx_jobs_user_id ON jobs(user_id);
|
|
CREATE INDEX IF NOT EXISTS idx_jobs_visibility_owner ON jobs(visibility, user_id);
|
|
CREATE INDEX IF NOT EXISTS idx_jobs_experiment ON jobs(experiment_id);
|
|
CREATE INDEX IF NOT EXISTS idx_task_shares_user ON task_shares(user_id);
|
|
CREATE INDEX IF NOT EXISTS idx_task_shares_expires ON task_shares(expires_at);
|
|
CREATE INDEX IF NOT EXISTS idx_tga_group ON task_group_access(group_id);
|
|
CREATE INDEX IF NOT EXISTS idx_share_tokens_task ON share_tokens(task_id);
|
|
CREATE INDEX IF NOT EXISTS idx_task_access_task ON task_access_log(task_id);
|
|
CREATE INDEX IF NOT EXISTS idx_task_access_user ON task_access_log(user_id);
|
|
CREATE INDEX IF NOT EXISTS idx_task_access_token ON task_access_log(token) WHERE token IS NOT NULL;
|
|
CREATE INDEX IF NOT EXISTS idx_invitations_user ON group_invitations(invited_user_id);
|