fetch_ml/internal/manifest/run_manifest.go

package manifest

import (
	"crypto/rand"
	"encoding/hex"
	"encoding/json"
	"fmt"
	"os"
	"path/filepath"
	"strings"
	"time"

	"github.com/jfraeys/fetch_ml/internal/crypto"
	"github.com/jfraeys/fetch_ml/internal/fileutil"
)

const (
	runManifestFilename = "run_manifest.json"
	manifestNonceLength = 16 // 32 hex chars
)

// GenerateManifestNonce generates a cryptographically secure nonce for manifest filenames.
// This prevents information disclosure in multi-tenant environments where predictable
// filenames could be enumerated.
func GenerateManifestNonce() (string, error) {
	nonce := make([]byte, manifestNonceLength)
	if _, err := rand.Read(nonce); err != nil {
		return "", fmt.Errorf("failed to generate manifest nonce: %w", err)
	}
	return hex.EncodeToString(nonce), nil
}

// GenerateManifestFilename creates a unique manifest filename with a cryptographic nonce.
// Format: run_manifest_<nonce>.json
func GenerateManifestFilename() (string, error) {
	nonce, err := GenerateManifestNonce()
	if err != nil {
		return "", err
	}
	return fmt.Sprintf("run_manifest_%s.json", nonce), nil
}

// ParseManifestFilename extracts the nonce from a manifest filename if present.
// Returns empty string if no nonce found.
func ParseManifestFilename(filename string) string {
	if !strings.HasPrefix(filename, "run_manifest_") || !strings.HasSuffix(filename, ".json") {
		return ""
	}
	nonce := strings.TrimPrefix(filename, "run_manifest_")
	nonce = strings.TrimSuffix(nonce, ".json")
	return nonce
}

type Annotation struct {
	Timestamp time.Time `json:"timestamp"`
	Author    string    `json:"author,omitempty"`
	Note      string    `json:"note"`
}

func (a *Annotation) UnmarshalJSON(data []byte) error {
	type annotationWire struct {
		Timestamp *time.Time `json:"timestamp,omitempty"`
		TS        *time.Time `json:"ts,omitempty"`
		Author    string     `json:"author,omitempty"`
		Note      string     `json:"note"`
	}
	var w annotationWire
	if err := json.Unmarshal(data, &w); err != nil {
		return err
	}
	if w.Timestamp != nil {
		a.Timestamp = *w.Timestamp
	} else if w.TS != nil {
		a.Timestamp = *w.TS
	}
	a.Author = w.Author
	a.Note = w.Note
	return nil
}

type Narrative struct {
	Hypothesis      string   `json:"hypothesis,omitempty"`
	Context         string   `json:"context,omitempty"`
	Intent          string   `json:"intent,omitempty"`
	ExpectedOutcome string   `json:"expected_outcome,omitempty"`
	ParentRun       string   `json:"parent_run,omitempty"`
	ExperimentGroup string   `json:"experiment_group,omitempty"`
	Tags            []string `json:"tags,omitempty"`
}

type NarrativePatch struct {
	Hypothesis      *string   `json:"hypothesis,omitempty"`
	Context         *string   `json:"context,omitempty"`
	Intent          *string   `json:"intent,omitempty"`
	ExpectedOutcome *string   `json:"expected_outcome,omitempty"`
	ParentRun       *string   `json:"parent_run,omitempty"`
	ExperimentGroup *string   `json:"experiment_group,omitempty"`
	Tags            *[]string `json:"tags,omitempty"`
}

// Outcome represents the documented result of a run.
type Outcome struct {
	Status        string   `json:"status,omitempty"`         // validated, invalidated, inconclusive, partial
	Summary       string   `json:"summary,omitempty"`        // Brief description
	KeyLearnings  []string `json:"key_learnings,omitempty"`  // 3-5 bullet points max
	FollowUpRuns  []string `json:"follow_up_runs,omitempty"` // References to related runs
	ArtifactsUsed []string `json:"artifacts_used,omitempty"` // e.g., ["model.pt", "metrics.json"]
}

type ArtifactFile struct {
	Path      string    `json:"path"`
	SizeBytes int64     `json:"size_bytes"`
	Modified  time.Time `json:"modified"`
}

type Artifacts struct {
	DiscoveryTime  time.Time      `json:"discovery_time"`
	Files          []ArtifactFile `json:"files,omitempty"`
	TotalSizeBytes int64          `json:"total_size_bytes,omitempty"`
	Exclusions     []Exclusion    `json:"exclusions,omitempty"` // R.5: Scan exclusions recorded
}

// Exclusion records why a path was excluded from artifact scanning
type Exclusion struct {
	Path   string `json:"path"`
	Reason string `json:"reason"`
}

// ExecutionEnvironment captures the runtime environment for reproducibility.
// This enables reconstruction and comparison of runs.
type ExecutionEnvironment struct {
	ConfigHash         string            `json:"config_hash"`                    // R.2: Resolved config hash
	GPUCount           int               `json:"gpu_count"`                      // GPU count detected
	GPUDetectionMethod string            `json:"gpu_detection_method,omitempty"` // R.3: "nvml", "env_override", etc.
	GPUVendor          string            `json:"gpu_vendor,omitempty"`           // Configured GPU vendor
	MaxWorkers         int               `json:"max_workers"`                    // Active resource limits
	PodmanCPUs         string            `json:"podman_cpus,omitempty"`          // CPU limit
	SandboxNetworkMode string            `json:"sandbox_network_mode"`           // Sandbox settings
	SandboxSeccomp     string            `json:"sandbox_seccomp,omitempty"`      // Seccomp profile
	SandboxNoNewPrivs  bool              `json:"sandbox_no_new_privs"`           // Security flags
	ComplianceMode     string            `json:"compliance_mode,omitempty"`      // HIPAA mode
	ManifestNonce      string            `json:"manifest_nonce,omitempty"`       // Unique manifest identifier
	Metadata           map[string]string `json:"metadata,omitempty"`             // Additional env info
}

// RunManifest is a best-effort, self-contained provenance record for a run.
// It is written to <run_dir>/run_manifest.json.
type RunManifest struct {
	RunID     string    `json:"run_id"`
	TaskID    string    `json:"task_id"`
	JobName   string    `json:"job_name"`
	CreatedAt time.Time `json:"created_at"`
	StartedAt time.Time `json:"started_at,omitempty"`
	EndedAt   time.Time `json:"ended_at,omitempty"`

	Annotations []Annotation `json:"annotations,omitempty"`
	Narrative   *Narrative   `json:"narrative,omitempty"`
	Outcome     *Outcome     `json:"outcome,omitempty"`
	Artifacts   *Artifacts   `json:"artifacts,omitempty"`

	CommitID              string `json:"commit_id,omitempty"`
	ExperimentManifestSHA string `json:"experiment_manifest_sha,omitempty"`
	DepsManifestName      string `json:"deps_manifest_name,omitempty"`
	DepsManifestSHA       string `json:"deps_manifest_sha,omitempty"`
	TrainScriptPath       string `json:"train_script_path,omitempty"`

	WorkerVersion string `json:"worker_version,omitempty"`
	PodmanImage   string `json:"podman_image,omitempty"`
	ImageDigest   string `json:"image_digest,omitempty"`

	SnapshotID     string `json:"snapshot_id,omitempty"`
	SnapshotSHA256 string `json:"snapshot_sha256,omitempty"`

	Command  string `json:"command,omitempty"`
	Args     string `json:"args,omitempty"`
	ExitCode *int   `json:"exit_code,omitempty"`
	Error    string `json:"error,omitempty"`

	StagingDurationMS   int64 `json:"staging_duration_ms,omitempty"`
	ExecutionDurationMS int64 `json:"execution_duration_ms,omitempty"`
	FinalizeDurationMS  int64 `json:"finalize_duration_ms,omitempty"`
	TotalDurationMS     int64 `json:"total_duration_ms,omitempty"`

	GPUDevices []string          `json:"gpu_devices,omitempty"`
	WorkerHost string            `json:"worker_host,omitempty"`
	Metadata   map[string]string `json:"metadata,omitempty"`

	// Environment captures execution environment for reproducibility (R.1)
	Environment *ExecutionEnvironment `json:"environment,omitempty"`

	// Signature fields for tamper detection
	Signature   string `json:"signature,omitempty"`
	SignerKeyID string `json:"signer_key_id,omitempty"`
	SigAlg      string `json:"sig_alg,omitempty"`
}

func NewRunManifest(runID, taskID, jobName string, createdAt time.Time) *RunManifest {
	m := &RunManifest{
		RunID:     runID,
		TaskID:    taskID,
		JobName:   jobName,
		CreatedAt: createdAt,
		Metadata:  make(map[string]string),
	}
	return m
}

// ManifestPath returns the default manifest path (legacy fixed filename).
// Deprecated: Use ManifestPathWithNonce for new code to support unique filenames.
func ManifestPath(dir string) string {
	return filepath.Join(dir, runManifestFilename)
}

// ManifestPathWithNonce returns the manifest path with a unique nonce.
// If nonce is empty, falls back to the default filename.
func ManifestPathWithNonce(dir, nonce string) string {
	if nonce == "" {
		return filepath.Join(dir, runManifestFilename)
	}
	filename := fmt.Sprintf("run_manifest_%s.json", nonce)
	return filepath.Join(dir, filename)
}

func (m *RunManifest) WriteToDir(dir string) error {
	if m == nil {
		return fmt.Errorf("run manifest is nil")
	}
	data, err := json.MarshalIndent(m, "", "  ")
	if err != nil {
		return fmt.Errorf("marshal run manifest: %w", err)
	}

	// Use nonce-based filename if Environment.ManifestNonce is set
	var manifestPath string
	if m.Environment != nil && m.Environment.ManifestNonce != "" {
		manifestPath = ManifestPathWithNonce(dir, m.Environment.ManifestNonce)
	} else {
		manifestPath = ManifestPath(dir)
	}

	if err := fileutil.SecureFileWrite(manifestPath, data, 0640); err != nil {
		return fmt.Errorf("write run manifest: %w", err)
	}
	return nil
}

func LoadFromDir(dir string) (*RunManifest, error) {
	// Try standard filename first
	data, err := fileutil.SecureFileRead(ManifestPath(dir))
	if err != nil {
		// If not found, look for nonce-based filename
		entries, readErr := os.ReadDir(dir)
		if readErr != nil {
			return nil, fmt.Errorf("read run manifest: %w", err)
		}

		for _, entry := range entries {
			if strings.HasPrefix(entry.Name(), "run_manifest_") && strings.HasSuffix(entry.Name(), ".json") {
				data, err = fileutil.SecureFileRead(filepath.Join(dir, entry.Name()))
				if err == nil {
					break
				}
			}
		}

		if err != nil {
			return nil, fmt.Errorf("read run manifest: %w", err)
		}
	}

	var m RunManifest
	if err := json.Unmarshal(data, &m); err != nil {
		return nil, fmt.Errorf("parse run manifest: %w", err)
	}
	return &m, nil
}

func (m *RunManifest) MarkStarted(t time.Time) {
	m.StartedAt = t
}

func (m *RunManifest) MarkFinished(t time.Time, exitCode *int, execErr error) {
	m.EndedAt = t
	m.ExitCode = exitCode
	if execErr != nil {
		m.Error = execErr.Error()
	} else {
		m.Error = ""
	}
	if !m.StartedAt.IsZero() {
		m.TotalDurationMS = m.EndedAt.Sub(m.StartedAt).Milliseconds()
	}
}

func (m *RunManifest) AddAnnotation(ts time.Time, author, note string) {
	if m == nil {
		return
	}
	n := strings.TrimSpace(note)
	if n == "" {
		return
	}
	a := Annotation{
		Timestamp: ts,
		Author:    strings.TrimSpace(author),
		Note:      n,
	}
	m.Annotations = append(m.Annotations, a)
}

func (m *RunManifest) ApplyNarrativePatch(p NarrativePatch) {
	if m == nil {
		return
	}
	if m.Narrative == nil {
		m.Narrative = &Narrative{}
	}
	if p.Hypothesis != nil {
		m.Narrative.Hypothesis = strings.TrimSpace(*p.Hypothesis)
	}
	if p.Context != nil {
		m.Narrative.Context = strings.TrimSpace(*p.Context)
	}
	if p.Intent != nil {
		m.Narrative.Intent = strings.TrimSpace(*p.Intent)
	}
	if p.ExpectedOutcome != nil {
		m.Narrative.ExpectedOutcome = strings.TrimSpace(*p.ExpectedOutcome)
	}
	if p.ParentRun != nil {
		m.Narrative.ParentRun = strings.TrimSpace(*p.ParentRun)
	}
	if p.ExperimentGroup != nil {
		m.Narrative.ExperimentGroup = strings.TrimSpace(*p.ExperimentGroup)
	}
	if p.Tags != nil {
		clean := make([]string, 0, len(*p.Tags))
		for _, t := range *p.Tags {
			t = strings.TrimSpace(t)
			if t == "" {
				continue
			}
			clean = append(clean, t)
		}
		m.Narrative.Tags = clean
	}
}

// Sign signs the manifest using the provided signer
func (m *RunManifest) Sign(signer *crypto.ManifestSigner) error {
	if m == nil {
		return fmt.Errorf("cannot sign nil manifest")
	}

	result, err := signer.SignManifest(m)
	if err != nil {
		return fmt.Errorf("failed to sign manifest: %w", err)
	}

	m.Signature = result.Signature
	m.SignerKeyID = result.KeyID
	m.SigAlg = result.Algorithm
	return nil
}

// Verify verifies the manifest signature using the provided public key
func (m *RunManifest) Verify(publicKey []byte) (bool, error) {
	if m == nil {
		return false, fmt.Errorf("cannot verify nil manifest")
	}

	if m.Signature == "" {
		return false, fmt.Errorf("manifest has no signature")
	}

	// Build signing result from manifest fields
	result := &crypto.SigningResult{
		Signature: m.Signature,
		KeyID:     m.SignerKeyID,
		Algorithm: m.SigAlg,
	}

	// Call crypto package to verify
	return crypto.VerifyManifest(m, result, publicKey)
}

// IsSigned returns true if the manifest has a signature
func (m *RunManifest) IsSigned() bool {
	return m != nil && m.Signature != ""
}

// Validate checks manifest completeness using the standard Validator.
// This delegates to Validator.Validate() for consistency.
func (m *RunManifest) Validate() error {
	v := NewValidator()
	return v.Validate(m)
}

// ValidateStrict performs strict validation including optional provenance fields.
// This delegates to Validator.ValidateStrict() for consistency.
func (m *RunManifest) ValidateStrict() error {
	v := NewValidator()
	return v.ValidateStrict(m)
}