-- SQLite schema for Fetch ML job persistence -- Complements Redis for task queuing CREATE TABLE IF NOT EXISTS jobs ( id TEXT PRIMARY KEY, job_name TEXT NOT NULL, args TEXT, status TEXT NOT NULL DEFAULT 'pending', priority INTEGER DEFAULT 0, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, started_at DATETIME, ended_at DATETIME, worker_id TEXT, error TEXT, datasets TEXT, -- JSON array metadata TEXT, -- JSON object updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE IF NOT EXISTS job_metrics ( job_id TEXT, metric_name TEXT, metric_value TEXT, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (job_id, metric_name, timestamp), FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE ); CREATE TABLE IF NOT EXISTS workers ( id TEXT PRIMARY KEY, hostname TEXT, last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP, status TEXT DEFAULT 'active', current_jobs INTEGER DEFAULT 0, max_jobs INTEGER DEFAULT 1, metadata TEXT -- JSON object ); CREATE TABLE IF NOT EXISTS system_metrics ( metric_name TEXT, metric_value TEXT, timestamp DATETIME DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (metric_name, timestamp) ); CREATE TABLE IF NOT EXISTS experiments ( id TEXT PRIMARY KEY, name TEXT NOT NULL, description TEXT, status TEXT DEFAULT 'pending', created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, user_id TEXT, workspace_id TEXT ); CREATE TABLE IF NOT EXISTS experiment_environments ( experiment_id TEXT PRIMARY KEY, python_version TEXT, cuda_version TEXT, system_os TEXT, system_arch TEXT, hostname TEXT, requirements_hash TEXT, conda_env_hash TEXT, dependencies TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE ); CREATE TABLE IF NOT EXISTS experiment_git_info ( experiment_id TEXT PRIMARY KEY, commit_sha TEXT, branch TEXT, remote_url TEXT, is_dirty INTEGER DEFAULT 0, diff_patch TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE ); CREATE TABLE IF NOT EXISTS experiment_seeds ( experiment_id TEXT PRIMARY KEY, numpy_seed INTEGER, torch_seed INTEGER, tensorflow_seed INTEGER, random_seed INTEGER, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE ); CREATE TABLE IF NOT EXISTS datasets ( name TEXT PRIMARY KEY, url TEXT NOT NULL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); -- Indexes for performance CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status); CREATE INDEX IF NOT EXISTS idx_jobs_created_at ON jobs(created_at); CREATE INDEX IF NOT EXISTS idx_jobs_worker_id ON jobs(worker_id); CREATE INDEX IF NOT EXISTS idx_job_metrics_job_id ON job_metrics(job_id); CREATE INDEX IF NOT EXISTS idx_job_metrics_timestamp ON job_metrics(timestamp); CREATE INDEX IF NOT EXISTS idx_workers_heartbeat ON workers(last_heartbeat); CREATE INDEX IF NOT EXISTS idx_system_metrics_timestamp ON system_metrics(timestamp); CREATE INDEX IF NOT EXISTS idx_experiments_created_at ON experiments(created_at); CREATE INDEX IF NOT EXISTS idx_experiments_status ON experiments(status); CREATE INDEX IF NOT EXISTS idx_experiments_user_id ON experiments(user_id); CREATE INDEX IF NOT EXISTS idx_datasets_name ON datasets(name); -- Triggers to update timestamps CREATE TRIGGER IF NOT EXISTS update_jobs_timestamp AFTER UPDATE ON jobs FOR EACH ROW BEGIN UPDATE jobs SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; END; CREATE TRIGGER IF NOT EXISTS update_experiments_timestamp AFTER UPDATE ON experiments FOR EACH ROW BEGIN UPDATE experiments SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id; END; CREATE TRIGGER IF NOT EXISTS update_datasets_timestamp AFTER UPDATE ON datasets FOR EACH ROW BEGIN UPDATE datasets SET updated_at = CURRENT_TIMESTAMP WHERE name = NEW.name; END;