-- SQLite schema for Fetch ML job persistence -- Complements Redis for task queuing CREATE TABLE IF NOT EXISTS jobs ( id TEXT PRIMARY KEY, job_name TEXT NOT NULL, args TEXT, status TEXT NOT NULL DEFAULT 'pending', priority INTEGER DEFAULT 0, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, started_at DATETIME, ended_at DATETIME, worker_id TEXT, user_id TEXT, error TEXT, datasets TEXT, -- JSON array metadata TEXT, -- JSON object updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, visibility TEXT NOT NULL DEFAULT 'lab', experiment_id TEXT REFERENCES experiments(id) ); CREATE TABLE IF NOT EXISTS job_metrics ( job_id TEXT, metric_name TEXT, metric_value TEXT, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (job_id, metric_name, timestamp), FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE ); CREATE TABLE IF NOT EXISTS workers ( id TEXT PRIMARY KEY, hostname TEXT, last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP, status TEXT DEFAULT 'active', current_jobs INTEGER DEFAULT 0, max_jobs INTEGER DEFAULT 1, metadata TEXT -- JSON object ); CREATE TABLE IF NOT EXISTS system_metrics ( metric_name TEXT, metric_value TEXT, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (metric_name, timestamp) ); CREATE TABLE IF NOT EXISTS experiments ( id TEXT PRIMARY KEY, name TEXT NOT NULL, description TEXT, status TEXT DEFAULT 'pending', created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, user_id TEXT, workspace_id TEXT ); CREATE TABLE IF NOT EXISTS experiment_environments ( experiment_id TEXT PRIMARY KEY, python_version TEXT, cuda_version TEXT, system_os TEXT, system_arch TEXT, hostname TEXT, requirements_hash TEXT, conda_env_hash TEXT, dependencies TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE ); CREATE TABLE IF NOT EXISTS experiment_git_info ( experiment_id TEXT PRIMARY KEY, commit_sha TEXT, branch TEXT, remote_url TEXT, is_dirty INTEGER DEFAULT 0, diff_patch TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE ); CREATE TABLE IF NOT EXISTS experiment_seeds ( experiment_id TEXT PRIMARY KEY, numpy_seed INTEGER, torch_seed INTEGER, tensorflow_seed INTEGER, random_seed INTEGER, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE ); CREATE TABLE IF NOT EXISTS datasets ( name TEXT PRIMARY KEY, url TEXT NOT NULL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); -- Indexes for performance CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status); CREATE INDEX IF NOT EXISTS idx_jobs_created_at ON jobs(created_at); CREATE INDEX IF NOT EXISTS idx_jobs_worker_id ON jobs(worker_id); CREATE INDEX IF NOT EXISTS idx_job_metrics_job_id ON job_metrics(job_id); CREATE INDEX IF NOT EXISTS idx_job_metrics_timestamp ON job_metrics(timestamp); CREATE INDEX IF NOT EXISTS idx_workers_heartbeat ON workers(last_heartbeat); CREATE INDEX IF NOT EXISTS idx_system_metrics_timestamp ON system_metrics(timestamp); CREATE INDEX IF NOT EXISTS idx_experiments_created_at ON experiments(created_at); CREATE INDEX IF NOT EXISTS idx_experiments_status ON experiments(status); CREATE INDEX IF NOT EXISTS idx_experiments_user_id ON experiments(user_id); CREATE INDEX IF NOT EXISTS idx_datasets_name ON datasets(name); -- Triggers to update timestamps CREATE TRIGGER IF NOT EXISTS update_jobs_timestamp AFTER UPDATE ON jobs FOR EACH ROW BEGIN UPDATE jobs SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; END; CREATE TRIGGER IF NOT EXISTS update_experiments_timestamp AFTER UPDATE ON experiments FOR EACH ROW BEGIN UPDATE experiments SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; END; CREATE TRIGGER IF NOT EXISTS update_datasets_timestamp AFTER UPDATE ON datasets FOR EACH ROW BEGIN UPDATE datasets SET updated_at = CURRENT_TIMESTAMP WHERE name = NEW.name; END; -- WebSocket metrics table for tracking real-time metrics CREATE TABLE IF NOT EXISTS websocket_metrics ( id INTEGER PRIMARY KEY AUTOINCREMENT, metric_name TEXT NOT NULL, metric_value REAL NOT NULL, user TEXT, recorded_at DATETIME DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_websocket_metrics_name_time ON websocket_metrics(metric_name, recorded_at); -- Groups and membership for lab-based task sharing CREATE TABLE IF NOT EXISTS groups ( id TEXT PRIMARY KEY, name TEXT NOT NULL UNIQUE, description TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, created_by TEXT NOT NULL ); CREATE TABLE IF NOT EXISTS group_members ( group_id TEXT NOT NULL, user_id TEXT NOT NULL, role TEXT DEFAULT 'member', -- 'admin', 'member', 'viewer' joined_at DATETIME DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (group_id, user_id), FOREIGN KEY (group_id) REFERENCES groups(id) ON DELETE CASCADE ); -- System group for institution visibility (all authenticated users) INSERT OR IGNORE INTO groups (id, name, description, created_by) VALUES ('all-users', 'all-users', 'System group: all authenticated users', 'system'); -- Invite-and-accept flow: group admins invite; users accept or decline CREATE TABLE IF NOT EXISTS group_invitations ( id TEXT PRIMARY KEY, group_id TEXT NOT NULL, invited_user_id TEXT NOT NULL, invited_by TEXT NOT NULL, status TEXT DEFAULT 'pending', -- 'pending', 'accepted', 'declined' created_at DATETIME DEFAULT CURRENT_TIMESTAMP, expires_at DATETIME, -- NULL = 7d default enforced in app layer FOREIGN KEY (group_id) REFERENCES groups(id) ON DELETE CASCADE ); -- Experiment/project grouping: share a whole experiment, not individual tasks -- Note: experiments table already exists; adding group_id to link with sharing system ALTER TABLE experiments ADD COLUMN group_id TEXT REFERENCES groups(id); -- Link tasks to experiments CREATE TABLE IF NOT EXISTS experiment_tasks ( experiment_id TEXT NOT NULL, task_id TEXT NOT NULL, added_at DATETIME DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (experiment_id, task_id), FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE, FOREIGN KEY (task_id) REFERENCES jobs(id) ON DELETE CASCADE ); -- Per-user explicit shares with optional expiry CREATE TABLE IF NOT EXISTS task_shares ( task_id TEXT NOT NULL, user_id TEXT NOT NULL, granted_by TEXT NOT NULL, granted_at DATETIME DEFAULT CURRENT_TIMESTAMP, expires_at DATETIME, -- NULL = no expiry; checked at access time PRIMARY KEY (task_id, user_id), FOREIGN KEY (task_id) REFERENCES jobs(id) ON DELETE CASCADE ); -- Group-level task association -- Records which group a task is associated with at submit time. -- Actual membership is always resolved live from group_members. CREATE TABLE IF NOT EXISTS task_group_access ( task_id TEXT NOT NULL, group_id TEXT NOT NULL, PRIMARY KEY (task_id, group_id), FOREIGN KEY (task_id) REFERENCES jobs(id) ON DELETE CASCADE, FOREIGN KEY (group_id) REFERENCES groups(id) ON DELETE CASCADE ); -- Signed share tokens for unauthenticated open access (paper reproducibility links) CREATE TABLE IF NOT EXISTS share_tokens ( token TEXT PRIMARY KEY, -- cryptographically random (32 bytes, base64url) task_id TEXT, -- NULL if experiment-level experiment_id TEXT, -- NULL if task-level created_by TEXT NOT NULL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, expires_at DATETIME, -- NULL = never expires access_count INTEGER DEFAULT 0, max_accesses INTEGER, -- NULL = unlimited FOREIGN KEY (task_id) REFERENCES jobs(id) ON DELETE CASCADE, FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE ); -- Audit log for task access CREATE TABLE IF NOT EXISTS task_access_log ( id INTEGER PRIMARY KEY AUTOINCREMENT, task_id TEXT NOT NULL, user_id TEXT, -- NULL for token-based access token TEXT, -- NULL for session-based access action TEXT NOT NULL, -- 'view', 'clone', 'execute', 'modify' accessed_at DATETIME DEFAULT CURRENT_TIMESTAMP, ip_address TEXT, FOREIGN KEY (task_id) REFERENCES jobs(id) ON DELETE CASCADE ); -- Indexes for task sharing performance CREATE INDEX IF NOT EXISTS idx_jobs_visibility ON jobs(visibility); CREATE INDEX IF NOT EXISTS idx_jobs_user_id ON jobs(user_id); CREATE INDEX IF NOT EXISTS idx_jobs_visibility_owner ON jobs(visibility, user_id); CREATE INDEX IF NOT EXISTS idx_jobs_experiment ON jobs(experiment_id); CREATE INDEX IF NOT EXISTS idx_task_shares_user ON task_shares(user_id); CREATE INDEX IF NOT EXISTS idx_task_shares_expires ON task_shares(expires_at); CREATE INDEX IF NOT EXISTS idx_tga_group ON task_group_access(group_id); CREATE INDEX IF NOT EXISTS idx_share_tokens_task ON share_tokens(task_id); CREATE INDEX IF NOT EXISTS idx_task_access_task ON task_access_log(task_id); CREATE INDEX IF NOT EXISTS idx_task_access_user ON task_access_log(user_id); CREATE INDEX IF NOT EXISTS idx_task_access_token ON task_access_log(token) WHERE token IS NOT NULL; CREATE INDEX IF NOT EXISTS idx_invitations_user ON group_invitations(invited_user_id);