Implement comprehensive audit and security infrastructure: - Immutable audit logs with platform-specific backends (Linux/Other) - Sealed log entries with tamper-evident checksums - Audit alert system for real-time security notifications - Log rotation with retention policies - Checkpoint-based audit verification Add multi-tenant security features: - Tenant manager with quota enforcement - Middleware for tenant authentication/authorization - Per-tenant cryptographic key isolation - Supply chain security for container verification - Cross-platform secure file utilities (Unix/Windows) Add test coverage: - Unit tests for audit alerts and sealed logs - Platform-specific audit backend tests
507 lines
14 KiB
Go
507 lines
14 KiB
Go
package audit
|
|
|
|
import (
|
|
"bufio"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/jfraeys/fetch_ml/internal/logging"
|
|
)
|
|
|
|
// EventType represents the type of audit event
|
|
type EventType string
|
|
|
|
const (
|
|
EventAuthAttempt EventType = "authentication_attempt"
|
|
EventAuthSuccess EventType = "authentication_success"
|
|
EventAuthFailure EventType = "authentication_failure"
|
|
EventJobQueued EventType = "job_queued"
|
|
EventJobStarted EventType = "job_started"
|
|
EventJobCompleted EventType = "job_completed"
|
|
EventJobFailed EventType = "job_failed"
|
|
EventJupyterStart EventType = "jupyter_start"
|
|
EventJupyterStop EventType = "jupyter_stop"
|
|
EventExperimentCreated EventType = "experiment_created"
|
|
EventExperimentDeleted EventType = "experiment_deleted"
|
|
|
|
// HIPAA-specific file access events
|
|
EventFileRead EventType = "file_read"
|
|
EventFileWrite EventType = "file_write"
|
|
EventFileDelete EventType = "file_delete"
|
|
EventDatasetAccess EventType = "dataset_access"
|
|
)
|
|
|
|
// Event represents an audit log event with integrity chain.
|
|
// SECURITY NOTE: Metadata uses map[string]any which relies on Go 1.20+'s
|
|
// guaranteed stable JSON key ordering for hash determinism. If you need to
|
|
// hash events externally, ensure the same ordering is used, or exclude
|
|
// Metadata from the hashed portion.
|
|
type Event struct {
|
|
Timestamp time.Time `json:"timestamp"`
|
|
Metadata map[string]any `json:"metadata,omitempty"`
|
|
EventType EventType `json:"event_type"`
|
|
UserID string `json:"user_id,omitempty"`
|
|
IPAddress string `json:"ip_address,omitempty"`
|
|
Resource string `json:"resource,omitempty"`
|
|
Action string `json:"action,omitempty"`
|
|
ErrorMsg string `json:"error,omitempty"`
|
|
PrevHash string `json:"prev_hash,omitempty"`
|
|
EventHash string `json:"event_hash,omitempty"`
|
|
SequenceNum int64 `json:"sequence_num,omitempty"`
|
|
Success bool `json:"success"`
|
|
}
|
|
|
|
// Logger handles audit logging with integrity chain
|
|
type Logger struct {
|
|
file *os.File
|
|
logger *logging.Logger
|
|
filePath string
|
|
lastHash string
|
|
sequenceNum int64
|
|
mu sync.Mutex
|
|
enabled bool
|
|
}
|
|
|
|
// NewLogger creates a new audit logger with secure path validation.
|
|
// It validates the filePath for path traversal, symlink attacks, and ensures
|
|
// it stays within the base directory (/var/lib/fetch_ml/audit).
|
|
func NewLogger(enabled bool, filePath string, logger *logging.Logger) (*Logger, error) {
|
|
return NewLoggerWithBase(enabled, filePath, logger, "/var/lib/fetch_ml/audit")
|
|
}
|
|
|
|
// NewLoggerWithBase creates a new audit logger with a configurable base directory.
|
|
// This is useful for testing. For production, use NewLogger which uses the default base.
|
|
func NewLoggerWithBase(enabled bool, filePath string, logger *logging.Logger, baseDir string) (*Logger, error) {
|
|
al := &Logger{
|
|
enabled: enabled,
|
|
logger: logger,
|
|
}
|
|
|
|
if !enabled || filePath == "" {
|
|
return al, nil
|
|
}
|
|
|
|
// Use secure path validation
|
|
fullPath, err := validateAndSecurePath(filePath, baseDir)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid audit log path: %w", err)
|
|
}
|
|
|
|
// Check if file is a symlink (security check)
|
|
if err := checkFileNotSymlink(fullPath); err != nil {
|
|
return nil, fmt.Errorf("audit log security check failed: %w", err)
|
|
}
|
|
|
|
if err := os.MkdirAll(filepath.Dir(fullPath), 0o700); err != nil {
|
|
return nil, fmt.Errorf("failed to create audit directory: %w", err)
|
|
}
|
|
|
|
file, err := os.OpenFile(fullPath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0o600)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to open audit log file: %w", err)
|
|
}
|
|
|
|
al.file = file
|
|
al.filePath = fullPath
|
|
|
|
// Restore chain state from existing log to prevent integrity break on restart
|
|
if err := al.resumeFromFile(); err != nil {
|
|
file.Close()
|
|
return nil, fmt.Errorf("failed to resume audit chain: %w", err)
|
|
}
|
|
|
|
return al, nil
|
|
}
|
|
|
|
// Log logs an audit event with integrity chain
|
|
func (al *Logger) Log(event Event) {
|
|
if !al.enabled {
|
|
return
|
|
}
|
|
|
|
event.Timestamp = time.Now().UTC()
|
|
|
|
al.mu.Lock()
|
|
defer al.mu.Unlock()
|
|
|
|
// Set sequence number and previous hash for integrity chain
|
|
al.sequenceNum++
|
|
event.SequenceNum = al.sequenceNum
|
|
event.PrevHash = al.lastHash
|
|
|
|
// Calculate hash of this event for tamper evidence
|
|
event.EventHash = al.CalculateEventHash(event)
|
|
al.lastHash = event.EventHash
|
|
|
|
// Marshal to JSON
|
|
data, err := json.Marshal(event)
|
|
if err != nil {
|
|
if al.logger != nil {
|
|
al.logger.Error("failed to marshal audit event", "error", err)
|
|
}
|
|
return
|
|
}
|
|
|
|
// Write to file if configured
|
|
if al.file != nil {
|
|
_, err = al.file.Write(append(data, '\n'))
|
|
if err != nil && al.logger != nil {
|
|
al.logger.Error("failed to write audit event", "error", err)
|
|
}
|
|
// fsync ensures data is flushed to disk before updating hash in memory.
|
|
// Critical for crash safety: prevents chain inconsistency if system
|
|
// crashes after hash advance but before write completion.
|
|
if err == nil {
|
|
if syncErr := al.file.Sync(); syncErr != nil && al.logger != nil {
|
|
al.logger.Error("failed to sync audit log", "error", syncErr)
|
|
}
|
|
}
|
|
}
|
|
|
|
hashPreview := event.EventHash
|
|
if len(hashPreview) > 16 {
|
|
hashPreview = hashPreview[:16]
|
|
}
|
|
|
|
// Also log via structured logger
|
|
if al.logger != nil {
|
|
al.logger.Info("audit_event",
|
|
"event_type", event.EventType,
|
|
"user_id", event.UserID,
|
|
"resource", event.Resource,
|
|
"success", event.Success,
|
|
"seq", event.SequenceNum,
|
|
"hash", hashPreview,
|
|
)
|
|
}
|
|
}
|
|
|
|
// CalculateEventHash computes SHA-256 hash of event data for integrity chain
|
|
// Exported for testing purposes
|
|
func (al *Logger) CalculateEventHash(event Event) string {
|
|
eventCopy := event
|
|
eventCopy.EventHash = "" // keep PrevHash for chaining
|
|
|
|
data, err := json.Marshal(eventCopy)
|
|
if err != nil {
|
|
fallback := fmt.Sprintf(
|
|
"%s:%s:%d:%s",
|
|
event.Timestamp.UTC().Format(time.RFC3339Nano),
|
|
event.EventType,
|
|
event.SequenceNum,
|
|
event.PrevHash,
|
|
)
|
|
data = []byte(fallback)
|
|
}
|
|
|
|
hash := sha256.Sum256(data)
|
|
return hex.EncodeToString(hash[:])
|
|
}
|
|
|
|
// LogFileAccess logs a file access operation (HIPAA requirement)
|
|
func (al *Logger) LogFileAccess(
|
|
eventType EventType,
|
|
userID, filePath, ipAddr string,
|
|
success bool,
|
|
errMsg string,
|
|
) {
|
|
var action string
|
|
|
|
switch eventType {
|
|
case EventFileRead:
|
|
action = "read"
|
|
case EventFileWrite:
|
|
action = "write"
|
|
case EventFileDelete:
|
|
action = "delete"
|
|
case EventDatasetAccess:
|
|
action = "dataset_access"
|
|
default:
|
|
// Defensive: prevent silent misclassification
|
|
if al.logger != nil {
|
|
al.logger.Error(
|
|
"invalid file access event type",
|
|
"event_type", eventType,
|
|
)
|
|
}
|
|
return
|
|
}
|
|
|
|
al.Log(Event{
|
|
EventType: eventType,
|
|
UserID: userID,
|
|
IPAddress: ipAddr,
|
|
Resource: filePath,
|
|
Action: action,
|
|
Success: success,
|
|
ErrorMsg: errMsg,
|
|
})
|
|
}
|
|
|
|
// VerifyChain checks the integrity of the audit log chain.
|
|
// The events slice must be provided in ascending sequence order.
|
|
// Returns the first sequence number where tampering is detected, or -1 if valid.
|
|
func (al *Logger) VerifyChain(events []Event) (tamperedSeq int, err error) {
|
|
if len(events) == 0 {
|
|
return -1, nil
|
|
}
|
|
|
|
var expectedPrevHash string
|
|
|
|
for i, event := range events {
|
|
// Enforce strict sequence ordering (events must be sorted by SequenceNum)
|
|
if event.SequenceNum != int64(i+1) {
|
|
return int(event.SequenceNum), fmt.Errorf(
|
|
"sequence mismatch: expected %d, got %d",
|
|
i+1, event.SequenceNum,
|
|
)
|
|
}
|
|
|
|
if i == 0 {
|
|
if event.PrevHash != "" {
|
|
return int(event.SequenceNum), fmt.Errorf(
|
|
"first event must have empty prev_hash",
|
|
)
|
|
}
|
|
// Explicit check: first event must have SequenceNum == 1
|
|
if event.SequenceNum != 1 {
|
|
return int(event.SequenceNum), fmt.Errorf(
|
|
"first event must have sequence_num=1, got %d",
|
|
event.SequenceNum,
|
|
)
|
|
}
|
|
} else {
|
|
if event.PrevHash != expectedPrevHash {
|
|
return int(event.SequenceNum), fmt.Errorf(
|
|
"chain break at sequence %d",
|
|
event.SequenceNum,
|
|
)
|
|
}
|
|
}
|
|
|
|
expectedHash := al.CalculateEventHash(event)
|
|
if event.EventHash != expectedHash {
|
|
return int(event.SequenceNum), fmt.Errorf(
|
|
"hash mismatch at sequence %d",
|
|
event.SequenceNum,
|
|
)
|
|
}
|
|
|
|
expectedPrevHash = event.EventHash
|
|
}
|
|
|
|
return -1, nil
|
|
}
|
|
|
|
// LogAuthAttempt logs an authentication attempt
|
|
func (al *Logger) LogAuthAttempt(userID, ipAddr string, success bool, errMsg string) {
|
|
eventType := EventAuthSuccess
|
|
if !success {
|
|
eventType = EventAuthFailure
|
|
}
|
|
|
|
al.Log(Event{
|
|
EventType: eventType,
|
|
UserID: userID,
|
|
IPAddress: ipAddr,
|
|
Success: success,
|
|
ErrorMsg: errMsg,
|
|
})
|
|
}
|
|
|
|
// LogJobOperation logs a job-related operation
|
|
func (al *Logger) LogJobOperation(
|
|
eventType EventType,
|
|
userID, jobID, ipAddr string,
|
|
success bool,
|
|
errMsg string,
|
|
) {
|
|
al.Log(Event{
|
|
EventType: eventType,
|
|
UserID: userID,
|
|
IPAddress: ipAddr,
|
|
Resource: jobID,
|
|
Action: "job_operation",
|
|
Success: success,
|
|
ErrorMsg: errMsg,
|
|
})
|
|
}
|
|
|
|
// LogJupyterOperation logs a Jupyter service operation
|
|
func (al *Logger) LogJupyterOperation(
|
|
eventType EventType,
|
|
userID, serviceID, ipAddr string,
|
|
success bool,
|
|
errMsg string,
|
|
) {
|
|
al.Log(Event{
|
|
EventType: eventType,
|
|
UserID: userID,
|
|
IPAddress: ipAddr,
|
|
Resource: serviceID,
|
|
Action: "jupyter_operation",
|
|
Success: success,
|
|
ErrorMsg: errMsg,
|
|
})
|
|
}
|
|
|
|
// Close closes the audit logger
|
|
func (al *Logger) Close() error {
|
|
al.mu.Lock()
|
|
defer al.mu.Unlock()
|
|
|
|
if al.file != nil {
|
|
return al.file.Close()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// resumeFromFile reads the last entry from the audit log file and restores
|
|
// the chain state (sequenceNum and lastHash) to prevent chain reset on restart.
|
|
// This is critical for tamper-evident logging integrity.
|
|
func (al *Logger) resumeFromFile() error {
|
|
if al.file == nil {
|
|
return nil
|
|
}
|
|
|
|
// Open file for reading to get the last entry
|
|
file, err := os.Open(al.filePath)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to open audit log for resume: %w", err)
|
|
}
|
|
defer file.Close()
|
|
|
|
var lastEvent Event
|
|
scanner := bufio.NewScanner(file)
|
|
lineNum := 0
|
|
|
|
for scanner.Scan() {
|
|
lineNum++
|
|
line := scanner.Text()
|
|
if line == "" {
|
|
continue
|
|
}
|
|
|
|
var event Event
|
|
if err := json.Unmarshal([]byte(line), &event); err != nil {
|
|
// Corrupted line - log but continue
|
|
if al.logger != nil {
|
|
al.logger.Warn("corrupted audit log entry during resume",
|
|
"line", lineNum,
|
|
"error", err)
|
|
}
|
|
continue
|
|
}
|
|
lastEvent = event
|
|
}
|
|
|
|
if err := scanner.Err(); err != nil {
|
|
return fmt.Errorf("error reading audit log during resume: %w", err)
|
|
}
|
|
|
|
// Restore chain state from last valid event
|
|
if lastEvent.SequenceNum > 0 {
|
|
al.sequenceNum = lastEvent.SequenceNum
|
|
al.lastHash = lastEvent.EventHash
|
|
if al.logger != nil {
|
|
al.logger.Info("audit chain resumed",
|
|
"sequence", al.sequenceNum,
|
|
"hash_preview", truncateHash(al.lastHash, 16))
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// truncateHash returns a truncated hash string for logging (safe preview)
|
|
func truncateHash(hash string, maxLen int) string {
|
|
if len(hash) <= maxLen {
|
|
return hash
|
|
}
|
|
return hash[:maxLen]
|
|
}
|
|
|
|
// validateAndSecurePath validates a file path for security issues.
|
|
// It checks for path traversal, symlinks, and ensures the path stays within baseDir.
|
|
func validateAndSecurePath(filePath, baseDir string) (string, error) {
|
|
// Reject absolute paths
|
|
if filepath.IsAbs(filePath) {
|
|
return "", fmt.Errorf("absolute paths not allowed: %s", filePath)
|
|
}
|
|
|
|
// Clean the path to resolve any . or .. components
|
|
cleanPath := filepath.Clean(filePath)
|
|
|
|
// Check for path traversal attempts after cleaning
|
|
// If the path starts with .., it's trying to escape
|
|
if strings.HasPrefix(cleanPath, "..") {
|
|
return "", fmt.Errorf("path traversal attempt detected: %s", filePath)
|
|
}
|
|
|
|
// Resolve base directory symlinks (critical for security)
|
|
resolvedBase, err := filepath.EvalSymlinks(baseDir)
|
|
if err != nil {
|
|
// Base may not exist yet, use as-is but this is less secure
|
|
resolvedBase = baseDir
|
|
}
|
|
|
|
// Construct full path
|
|
fullPath := filepath.Join(resolvedBase, cleanPath)
|
|
|
|
// Resolve any symlinks in the full path
|
|
resolvedPath, err := filepath.EvalSymlinks(fullPath)
|
|
if err != nil {
|
|
// File doesn't exist yet - check parent directory
|
|
parent := filepath.Dir(fullPath)
|
|
resolvedParent, err := filepath.EvalSymlinks(parent)
|
|
if err != nil {
|
|
// Parent doesn't exist - validate the path itself
|
|
// Check that the path stays within base directory
|
|
if !strings.HasPrefix(fullPath, resolvedBase+string(os.PathSeparator)) &&
|
|
fullPath != resolvedBase {
|
|
return "", fmt.Errorf("path escapes base directory: %s", filePath)
|
|
}
|
|
resolvedPath = fullPath
|
|
} else {
|
|
// Parent resolved - verify it's still within base
|
|
if !strings.HasPrefix(resolvedParent, resolvedBase) {
|
|
return "", fmt.Errorf("parent directory escapes base: %s", filePath)
|
|
}
|
|
// Reconstruct path with resolved parent
|
|
base := filepath.Base(fullPath)
|
|
resolvedPath = filepath.Join(resolvedParent, base)
|
|
}
|
|
}
|
|
|
|
// Final verification: resolved path must be within base directory
|
|
if !strings.HasPrefix(resolvedPath, resolvedBase+string(os.PathSeparator)) &&
|
|
resolvedPath != resolvedBase {
|
|
return "", fmt.Errorf("path escapes base directory after symlink resolution: %s", filePath)
|
|
}
|
|
|
|
return resolvedPath, nil
|
|
}
|
|
|
|
// checkFileNotSymlink verifies that the given path is not a symlink
|
|
func checkFileNotSymlink(path string) error {
|
|
info, err := os.Lstat(path)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil // File doesn't exist, can't be a symlink
|
|
}
|
|
return fmt.Errorf("failed to stat file: %w", err)
|
|
}
|
|
|
|
if info.Mode()&os.ModeSymlink != 0 {
|
|
return fmt.Errorf("file is a symlink: %s", path)
|
|
}
|
|
|
|
return nil
|
|
}
|