fetch_ml/internal/storage/schema_sqlite.sql

133 lines
4.2 KiB
SQL

-- SQLite schema for Fetch ML job persistence
-- Complements Redis for task queuing
CREATE TABLE IF NOT EXISTS jobs (
id TEXT PRIMARY KEY,
job_name TEXT NOT NULL,
args TEXT,
status TEXT NOT NULL DEFAULT 'pending',
priority INTEGER DEFAULT 0,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
started_at DATETIME,
ended_at DATETIME,
worker_id TEXT,
error TEXT,
datasets TEXT, -- JSON array
metadata TEXT, -- JSON object
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS job_metrics (
job_id TEXT,
metric_name TEXT,
metric_value TEXT,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (job_id, metric_name, timestamp),
FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS workers (
id TEXT PRIMARY KEY,
hostname TEXT,
last_heartbeat DATETIME DEFAULT CURRENT_TIMESTAMP,
status TEXT DEFAULT 'active',
current_jobs INTEGER DEFAULT 0,
max_jobs INTEGER DEFAULT 1,
metadata TEXT -- JSON object
);
CREATE TABLE IF NOT EXISTS system_metrics (
metric_name TEXT,
metric_value TEXT,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (metric_name, timestamp)
);
CREATE TABLE IF NOT EXISTS experiments (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
description TEXT,
status TEXT DEFAULT 'pending',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
user_id TEXT,
workspace_id TEXT
);
CREATE TABLE IF NOT EXISTS experiment_environments (
experiment_id TEXT PRIMARY KEY,
python_version TEXT,
cuda_version TEXT,
system_os TEXT,
system_arch TEXT,
hostname TEXT,
requirements_hash TEXT,
conda_env_hash TEXT,
dependencies TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS experiment_git_info (
experiment_id TEXT PRIMARY KEY,
commit_sha TEXT,
branch TEXT,
remote_url TEXT,
is_dirty INTEGER DEFAULT 0,
diff_patch TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS experiment_seeds (
experiment_id TEXT PRIMARY KEY,
numpy_seed INTEGER,
torch_seed INTEGER,
tensorflow_seed INTEGER,
random_seed INTEGER,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (experiment_id) REFERENCES experiments(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS datasets (
name TEXT PRIMARY KEY,
url TEXT NOT NULL,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- Indexes for performance
CREATE INDEX IF NOT EXISTS idx_jobs_status ON jobs(status);
CREATE INDEX IF NOT EXISTS idx_jobs_created_at ON jobs(created_at);
CREATE INDEX IF NOT EXISTS idx_jobs_worker_id ON jobs(worker_id);
CREATE INDEX IF NOT EXISTS idx_job_metrics_job_id ON job_metrics(job_id);
CREATE INDEX IF NOT EXISTS idx_job_metrics_timestamp ON job_metrics(timestamp);
CREATE INDEX IF NOT EXISTS idx_workers_heartbeat ON workers(last_heartbeat);
CREATE INDEX IF NOT EXISTS idx_system_metrics_timestamp ON system_metrics(timestamp);
CREATE INDEX IF NOT EXISTS idx_experiments_created_at ON experiments(created_at);
CREATE INDEX IF NOT EXISTS idx_experiments_status ON experiments(status);
CREATE INDEX IF NOT EXISTS idx_experiments_user_id ON experiments(user_id);
CREATE INDEX IF NOT EXISTS idx_datasets_name ON datasets(name);
-- Triggers to update timestamps
CREATE TRIGGER IF NOT EXISTS update_jobs_timestamp
AFTER UPDATE ON jobs
FOR EACH ROW
BEGIN
UPDATE jobs SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
END;
CREATE TRIGGER IF NOT EXISTS update_experiments_timestamp
AFTER UPDATE ON experiments
FOR EACH ROW
BEGIN
UPDATE experiments SET updated_at = CURRENT_TIMESTAMP WHERE id = NEW.id;
END;
CREATE TRIGGER IF NOT EXISTS update_datasets_timestamp
AFTER UPDATE ON datasets
FOR EACH ROW
BEGIN
UPDATE datasets SET updated_at = CURRENT_TIMESTAMP WHERE name = NEW.name;
END;