fetch_ml/internal/manifest/run_manifest.go

package manifest

import (
	"crypto/rand"
	"encoding/hex"
	"encoding/json"
	"fmt"
	"os"
	"path/filepath"
	"strings"
	"time"

	"github.com/jfraeys/fetch_ml/internal/crypto"
	"github.com/jfraeys/fetch_ml/internal/fileutil"
)

const (
	runManifestFilename = "run_manifest.json"
	manifestNonceLength = 16 // 32 hex chars
)

// GenerateManifestNonce generates a cryptographically secure nonce for manifest filenames.
// This prevents information disclosure in multi-tenant environments where predictable
// filenames could be enumerated.
func GenerateManifestNonce() (string, error) {
	nonce := make([]byte, manifestNonceLength)
	if _, err := rand.Read(nonce); err != nil {
		return "", fmt.Errorf("failed to generate manifest nonce: %w", err)
	}
	return hex.EncodeToString(nonce), nil
}

// GenerateManifestFilename creates a unique manifest filename with a cryptographic nonce.
// Format: run_manifest_<nonce>.json
func GenerateManifestFilename() (string, error) {
	nonce, err := GenerateManifestNonce()
	if err != nil {
		return "", err
	}
	return fmt.Sprintf("run_manifest_%s.json", nonce), nil
}

// ParseManifestFilename extracts the nonce from a manifest filename if present.
// Returns empty string if no nonce found.
func ParseManifestFilename(filename string) string {
	if !strings.HasPrefix(filename, "run_manifest_") || !strings.HasSuffix(filename, ".json") {
		return ""
	}
	nonce := strings.TrimPrefix(filename, "run_manifest_")
	nonce = strings.TrimSuffix(nonce, ".json")
	return nonce
}

type Annotation struct {
	Timestamp time.Time `json:"timestamp"`
	Author    string    `json:"author,omitempty"`
	Note      string    `json:"note"`
}

func (a *Annotation) UnmarshalJSON(data []byte) error {
	type annotationWire struct {
		Timestamp *time.Time `json:"timestamp,omitempty"`
		TS        *time.Time `json:"ts,omitempty"`
		Author    string     `json:"author,omitempty"`
		Note      string     `json:"note"`
	}
	var w annotationWire
	if err := json.Unmarshal(data, &w); err != nil {
		return err
	}
	if w.Timestamp != nil {
		a.Timestamp = *w.Timestamp
	} else if w.TS != nil {
		a.Timestamp = *w.TS
	}
	a.Author = w.Author
	a.Note = w.Note
	return nil
}

type Narrative struct {
	Hypothesis      string   `json:"hypothesis,omitempty"`
	Context         string   `json:"context,omitempty"`
	Intent          string   `json:"intent,omitempty"`
	ExpectedOutcome string   `json:"expected_outcome,omitempty"`
	ParentRun       string   `json:"parent_run,omitempty"`
	ExperimentGroup string   `json:"experiment_group,omitempty"`
	Tags            []string `json:"tags,omitempty"`
}

type NarrativePatch struct {
	Hypothesis      *string   `json:"hypothesis,omitempty"`
	Context         *string   `json:"context,omitempty"`
	Intent          *string   `json:"intent,omitempty"`
	ExpectedOutcome *string   `json:"expected_outcome,omitempty"`
	ParentRun       *string   `json:"parent_run,omitempty"`
	ExperimentGroup *string   `json:"experiment_group,omitempty"`
	Tags            *[]string `json:"tags,omitempty"`
}

// Outcome represents the documented result of a run.
type Outcome struct {
	Status        string   `json:"status,omitempty"`         // validated, invalidated, inconclusive, partial
	Summary       string   `json:"summary,omitempty"`        // Brief description
	KeyLearnings  []string `json:"key_learnings,omitempty"`  // 3-5 bullet points max
	FollowUpRuns  []string `json:"follow_up_runs,omitempty"` // References to related runs
	ArtifactsUsed []string `json:"artifacts_used,omitempty"` // e.g., ["model.pt", "metrics.json"]
}

type ArtifactFile struct {
	Modified  time.Time `json:"modified"`
	Path      string    `json:"path"`
	SizeBytes int64     `json:"size_bytes"`
}

type Artifacts struct {
	DiscoveryTime  time.Time      `json:"discovery_time"`
	Files          []ArtifactFile `json:"files,omitempty"`
	Exclusions     []Exclusion    `json:"exclusions,omitempty"`
	TotalSizeBytes int64          `json:"total_size_bytes,omitempty"`
}

// Exclusion records why a path was excluded from artifact scanning
type Exclusion struct {
	Path   string `json:"path"`
	Reason string `json:"reason"`
}

// ExecutionEnvironment captures the runtime environment for reproducibility.
// This enables reconstruction and comparison of runs.
type ExecutionEnvironment struct {
	Metadata           map[string]string `json:"metadata,omitempty"`
	ConfigHash         string            `json:"config_hash"`
	GPUDetectionMethod string            `json:"gpu_detection_method,omitempty"`
	GPUVendor          string            `json:"gpu_vendor,omitempty"`
	PodmanCPUs         string            `json:"podman_cpus,omitempty"`
	SandboxNetworkMode string            `json:"sandbox_network_mode"`
	SandboxSeccomp     string            `json:"sandbox_seccomp,omitempty"`
	ComplianceMode     string            `json:"compliance_mode,omitempty"`
	ManifestNonce      string            `json:"manifest_nonce,omitempty"`
	GPUCount           int               `json:"gpu_count"`
	MaxWorkers         int               `json:"max_workers"`
	SandboxNoNewPrivs  bool              `json:"sandbox_no_new_privs"`
}

// RunManifest is a best-effort, self-contained provenance record for a run.
// It is written to <run_dir>/run_manifest.json.
type RunManifest struct {
	CreatedAt             time.Time             `json:"created_at"`
	StartedAt             time.Time             `json:"started_at,omitempty"`
	EndedAt               time.Time             `json:"ended_at,omitempty"`
	Artifacts             *Artifacts            `json:"artifacts,omitempty"`
	Environment           *ExecutionEnvironment `json:"environment,omitempty"`
	Metadata              map[string]string     `json:"metadata,omitempty"`
	ExitCode              *int                  `json:"exit_code,omitempty"`
	Narrative             *Narrative            `json:"narrative,omitempty"`
	Outcome               *Outcome              `json:"outcome,omitempty"`
	PodmanImage           string                `json:"podman_image,omitempty"`
	SnapshotID            string                `json:"snapshot_id,omitempty"`
	ExperimentManifestSHA string                `json:"experiment_manifest_sha,omitempty"`
	DepsManifestName      string                `json:"deps_manifest_name,omitempty"`
	DepsManifestSHA       string                `json:"deps_manifest_sha,omitempty"`
	TrainScriptPath       string                `json:"train_script_path,omitempty"`
	WorkerVersion         string                `json:"worker_version,omitempty"`
	RunID                 string                `json:"run_id"`
	ImageDigest           string                `json:"image_digest,omitempty"`
	WorkerHost            string                `json:"worker_host,omitempty"`
	SnapshotSHA256        string                `json:"snapshot_sha256,omitempty"`
	Command               string                `json:"command,omitempty"`
	Args                  string                `json:"args,omitempty"`
	CommitID              string                `json:"commit_id,omitempty"`
	Error                 string                `json:"error,omitempty"`
	SigAlg                string                `json:"sig_alg,omitempty"`
	SignerKeyID           string                `json:"signer_key_id,omitempty"`
	Signature             string                `json:"signature,omitempty"`
	TaskID                string                `json:"task_id"`
	JobName               string                `json:"job_name"`
	Annotations           []Annotation          `json:"annotations,omitempty"`
	GPUDevices            []string              `json:"gpu_devices,omitempty"`
	TotalDurationMS       int64                 `json:"total_duration_ms,omitempty"`
	FinalizeDurationMS    int64                 `json:"finalize_duration_ms,omitempty"`
	ExecutionDurationMS   int64                 `json:"execution_duration_ms,omitempty"`
	StagingDurationMS     int64                 `json:"staging_duration_ms,omitempty"`
}

func NewRunManifest(runID, taskID, jobName string, createdAt time.Time) *RunManifest {
	m := &RunManifest{
		RunID:     runID,
		TaskID:    taskID,
		JobName:   jobName,
		CreatedAt: createdAt,
		Metadata:  make(map[string]string),
	}
	return m
}

// ManifestPath returns the default manifest path (legacy fixed filename).
// Deprecated: Use ManifestPathWithNonce for new code to support unique filenames.
func ManifestPath(dir string) string {
	return filepath.Join(dir, runManifestFilename)
}

// ManifestPathWithNonce returns the manifest path with a unique nonce.
// If nonce is empty, falls back to the default filename.
func ManifestPathWithNonce(dir, nonce string) string {
	if nonce == "" {
		return filepath.Join(dir, runManifestFilename)
	}
	filename := fmt.Sprintf("run_manifest_%s.json", nonce)
	return filepath.Join(dir, filename)
}

func (m *RunManifest) WriteToDir(dir string) error {
	if m == nil {
		return fmt.Errorf("run manifest is nil")
	}
	data, err := json.MarshalIndent(m, "", "  ")
	if err != nil {
		return fmt.Errorf("marshal run manifest: %w", err)
	}

	// Use nonce-based filename if Environment.ManifestNonce is set
	var manifestPath string
	if m.Environment != nil && m.Environment.ManifestNonce != "" {
		manifestPath = ManifestPathWithNonce(dir, m.Environment.ManifestNonce)
	} else {
		manifestPath = ManifestPath(dir)
	}

	if err := fileutil.SecureFileWrite(manifestPath, data, 0640); err != nil {
		return fmt.Errorf("write run manifest: %w", err)
	}
	return nil
}

func LoadFromDir(dir string) (*RunManifest, error) {
	// Try standard filename first
	data, err := fileutil.SecureFileRead(ManifestPath(dir))
	if err != nil {
		// If not found, look for nonce-based filename
		entries, readErr := os.ReadDir(dir)
		if readErr != nil {
			return nil, fmt.Errorf("read run manifest: %w", err)
		}

		for _, entry := range entries {
			if strings.HasPrefix(entry.Name(), "run_manifest_") && strings.HasSuffix(entry.Name(), ".json") {
				data, err = fileutil.SecureFileRead(filepath.Join(dir, entry.Name()))
				if err == nil {
					break
				}
			}
		}

		if err != nil {
			return nil, fmt.Errorf("read run manifest: %w", err)
		}
	}

	var m RunManifest
	if err := json.Unmarshal(data, &m); err != nil {
		return nil, fmt.Errorf("parse run manifest: %w", err)
	}
	return &m, nil
}

func (m *RunManifest) MarkStarted(t time.Time) {
	m.StartedAt = t
}

func (m *RunManifest) MarkFinished(t time.Time, exitCode *int, execErr error) {
	m.EndedAt = t
	m.ExitCode = exitCode
	if execErr != nil {
		m.Error = execErr.Error()
	} else {
		m.Error = ""
	}
	if !m.StartedAt.IsZero() {
		m.TotalDurationMS = m.EndedAt.Sub(m.StartedAt).Milliseconds()
	}
}

func (m *RunManifest) AddAnnotation(ts time.Time, author, note string) {
	if m == nil {
		return
	}
	n := strings.TrimSpace(note)
	if n == "" {
		return
	}
	a := Annotation{
		Timestamp: ts,
		Author:    strings.TrimSpace(author),
		Note:      n,
	}
	m.Annotations = append(m.Annotations, a)
}

func (m *RunManifest) ApplyNarrativePatch(p NarrativePatch) {
	if m == nil {
		return
	}
	if m.Narrative == nil {
		m.Narrative = &Narrative{}
	}
	if p.Hypothesis != nil {
		m.Narrative.Hypothesis = strings.TrimSpace(*p.Hypothesis)
	}
	if p.Context != nil {
		m.Narrative.Context = strings.TrimSpace(*p.Context)
	}
	if p.Intent != nil {
		m.Narrative.Intent = strings.TrimSpace(*p.Intent)
	}
	if p.ExpectedOutcome != nil {
		m.Narrative.ExpectedOutcome = strings.TrimSpace(*p.ExpectedOutcome)
	}
	if p.ParentRun != nil {
		m.Narrative.ParentRun = strings.TrimSpace(*p.ParentRun)
	}
	if p.ExperimentGroup != nil {
		m.Narrative.ExperimentGroup = strings.TrimSpace(*p.ExperimentGroup)
	}
	if p.Tags != nil {
		clean := make([]string, 0, len(*p.Tags))
		for _, t := range *p.Tags {
			t = strings.TrimSpace(t)
			if t == "" {
				continue
			}
			clean = append(clean, t)
		}
		m.Narrative.Tags = clean
	}
}

// Sign signs the manifest using the provided signer
func (m *RunManifest) Sign(signer *crypto.ManifestSigner) error {
	if m == nil {
		return fmt.Errorf("cannot sign nil manifest")
	}

	result, err := signer.SignManifest(m)
	if err != nil {
		return fmt.Errorf("failed to sign manifest: %w", err)
	}

	m.Signature = result.Signature
	m.SignerKeyID = result.KeyID
	m.SigAlg = result.Algorithm
	return nil
}

// Verify verifies the manifest signature using the provided public key
func (m *RunManifest) Verify(publicKey []byte) (bool, error) {
	if m == nil {
		return false, fmt.Errorf("cannot verify nil manifest")
	}

	if m.Signature == "" {
		return false, fmt.Errorf("manifest has no signature")
	}

	// Build signing result from manifest fields
	result := &crypto.SigningResult{
		Signature: m.Signature,
		KeyID:     m.SignerKeyID,
		Algorithm: m.SigAlg,
	}

	// Call crypto package to verify
	return crypto.VerifyManifest(m, result, publicKey)
}

// IsSigned returns true if the manifest has a signature
func (m *RunManifest) IsSigned() bool {
	return m != nil && m.Signature != ""
}

// Validate checks manifest completeness using the standard Validator.
// This delegates to Validator.Validate() for consistency.
func (m *RunManifest) Validate() error {
	v := NewValidator()
	return v.Validate(m)
}

// ValidateStrict performs strict validation including optional provenance fields.
// This delegates to Validator.ValidateStrict() for consistency.
func (m *RunManifest) ValidateStrict() error {
	v := NewValidator()
	return v.ValidateStrict(m)
}

// ValidateProvenance checks if the manifest has complete provenance information.
// When ProvenanceBestEffort is false (default), this must pass before writing.
// Returns an error describing what's missing if validation fails.
func (m *RunManifest) ValidateProvenance() error {
	if m == nil {
		return fmt.Errorf("manifest is nil")
	}

	var missing []string

	// Check Environment is present
	if m.Environment == nil {
		missing = append(missing, "environment metadata")
	} else {
		// Check ConfigHash - critical for reproducibility
		if m.Environment.ConfigHash == "" {
			missing = append(missing, "config_hash")
		}

		// Check GPU detection method
		if m.Environment.GPUDetectionMethod == "" {
			missing = append(missing, "gpu_detection_method")
		}

		// Check Sandbox configuration
		if m.Environment.SandboxNetworkMode == "" {
			missing = append(missing, "sandbox_network_mode")
		}
	}

	// Check Artifacts are present (though they may be empty for new runs)
	if m.Artifacts == nil {
		missing = append(missing, "artifacts metadata")
	}

	// Check basic run identification
	if m.RunID == "" {
		missing = append(missing, "run_id")
	}
	if m.TaskID == "" {
		missing = append(missing, "task_id")
	}

	if len(missing) > 0 {
		return fmt.Errorf("incomplete provenance record: missing %v", missing)
	}

	return nil
}

// CanWrite checks if the manifest can be written given the provenance requirements.
// When ProvenanceBestEffort is false (default), requires complete environment capture.
// When true, allows partial manifests with warnings.
func (m *RunManifest) CanWrite(provenanceBestEffort bool) error {
	// Always validate basic structure
	if err := m.Validate(); err != nil {
		return fmt.Errorf("manifest validation failed: %w", err)
	}

	// If best-effort is enabled, allow partial manifests
	if provenanceBestEffort {
		return nil
	}

	// Otherwise, require complete provenance (fail-closed default)
	if err := m.ValidateProvenance(); err != nil {
		return fmt.Errorf("provenance validation failed (set provenance_best_effort: true to allow partial manifests): %w", err)
	}

	return nil
}