From e1ec255ad285365fe6e9bffdc59b0aeb9ae5d16f Mon Sep 17 00:00:00 2001 From: Jeremie Fraeys Date: Tue, 3 Mar 2026 19:14:27 -0500 Subject: [PATCH] refactor(crypto): integrate KMS with TenantKeyManager Replace in-memory root keys with KMS interface: - GenerateDataEncryptionKey: generate DEK, wrap via KMS, cache - UnwrapDataEncryptionKey: cache check, KMS decrypt, cache store - EncryptArtifact/DecryptArtifact: use DEK from KMS - RotateTenantKey: create new KMS key, flush cache - RevokeTenant: disable KMS key, schedule deletion per ADR-015 Remove deprecated methods: wrapKey, unwrapKey (replaced by KMS) --- internal/crypto/tenant_keys.go | 252 ++++++++++++++++++--------------- 1 file changed, 136 insertions(+), 116 deletions(-) diff --git a/internal/crypto/tenant_keys.go b/internal/crypto/tenant_keys.go index ba2b636..e2426be 100644 --- a/internal/crypto/tenant_keys.go +++ b/internal/crypto/tenant_keys.go @@ -1,8 +1,9 @@ // Package crypto provides tenant-scoped encryption key management for multi-tenant deployments. -// This implements Phase 9.4: Per-Tenant Encryption Keys. +// This implements Phase 9.4: Per-Tenant Encryption Keys with KMS integration per ADR-012 through ADR-015. package crypto import ( + "context" "crypto/aes" "crypto/cipher" "crypto/rand" @@ -13,128 +14,183 @@ import ( "io" "strings" "time" + + "github.com/jfraeys/fetch_ml/internal/crypto/kms" ) // KeyHierarchy defines the tenant key structure -// Root Key (per tenant) -> Data Encryption Keys (per artifact) +// Root Key (per tenant in KMS) -> Data Encryption Keys (per artifact, cached per ADR-012) type KeyHierarchy struct { - TenantID string `json:"tenant_id"` - RootKeyID string `json:"root_key_id"` - CreatedAt time.Time `json:"created_at"` - Algorithm string `json:"algorithm"` // Always "AES-256-GCM" + TenantID string `json:"tenant_id"` + RootKeyID string `json:"root_key_id"` + KMSKeyID string `json:"kms_key_id"` // External KMS key identifier per ADR-014 + CreatedAt time.Time `json:"created_at"` + Algorithm string `json:"algorithm"` // Always "AES-256-GCM" } -// TenantKeyManager manages per-tenant encryption keys -// In production, root keys should be stored in a KMS (HashiCorp Vault, AWS KMS, etc.) +// TenantKeyManager manages per-tenant encryption keys using external KMS per ADR-012 through ADR-015. +// Root keys are stored in the KMS; DEKs are generated locally and cached. type TenantKeyManager struct { - // In-memory store for development; use external KMS in production - rootKeys map[string][]byte // tenantID -> root key + kms kms.KMSProvider // External KMS for root key operations + cache *kms.DEKCache // In-process DEK cache per ADR-012 + config kms.Config // KMS configuration + ctx context.Context // Background context for operations } -// NewTenantKeyManager creates a new tenant key manager -func NewTenantKeyManager() *TenantKeyManager { +// NewTenantKeyManager creates a new tenant key manager with KMS integration. +func NewTenantKeyManager(provider kms.KMSProvider, cache *kms.DEKCache, config kms.Config) *TenantKeyManager { return &TenantKeyManager{ - rootKeys: make(map[string][]byte), + kms: provider, + cache: cache, + config: config, + ctx: context.Background(), } } -// ProvisionTenant creates a new root key for a tenant -// In production, this would call out to a KMS to create a key +// ProvisionTenant creates a new root key for a tenant in the KMS. func (km *TenantKeyManager) ProvisionTenant(tenantID string) (*KeyHierarchy, error) { if strings.TrimSpace(tenantID) == "" { return nil, fmt.Errorf("tenant ID cannot be empty") } - // Generate root key (32 bytes for AES-256) - rootKey := make([]byte, 32) - if _, err := io.ReadFull(rand.Reader, rootKey); err != nil { - return nil, fmt.Errorf("failed to generate root key: %w", err) + // Create KMS key for tenant + kmsKeyID, err := km.kms.CreateKey(km.ctx, tenantID) + if err != nil { + return nil, fmt.Errorf("failed to create KMS key: %w", err) } - // Create key ID from hash of key (for reference, not for key derivation) - h := sha256.Sum256(rootKey) + // Create key ID from hash of tenant ID + timestamp + h := sha256.Sum256([]byte(tenantID + time.Now().String())) rootKeyID := hex.EncodeToString(h[:8]) // First 8 bytes as ID - // Store root key - km.rootKeys[tenantID] = rootKey - return &KeyHierarchy{ TenantID: tenantID, RootKeyID: rootKeyID, + KMSKeyID: kmsKeyID, CreatedAt: time.Now().UTC(), Algorithm: "AES-256-GCM", }, nil } -// RotateTenantKey rotates the root key for a tenant -// Existing data must be re-encrypted with the new key -func (km *TenantKeyManager) RotateTenantKey(tenantID string) (*KeyHierarchy, error) { - // Delete old key - delete(km.rootKeys, tenantID) +// RotateTenantKey rotates the root key for a tenant. +// Creates new KMS key and schedules deletion of old key per ADR-015. +func (km *TenantKeyManager) RotateTenantKey(tenantID string, hierarchy *KeyHierarchy) (*KeyHierarchy, error) { + // Schedule deletion of old key (90 day window per ADR-015) + _, err := km.kms.ScheduleKeyDeletion(km.ctx, hierarchy.KMSKeyID, 90) + if err != nil { + return nil, fmt.Errorf("failed to schedule old key deletion: %w", err) + } + + // Flush DEK cache for this tenant + km.cache.Flush(tenantID) // Provision new key return km.ProvisionTenant(tenantID) } -// RevokeTenant removes all keys for a tenant -// This effectively makes all encrypted data inaccessible -func (km *TenantKeyManager) RevokeTenant(tenantID string) error { - if _, exists := km.rootKeys[tenantID]; !exists { - return fmt.Errorf("tenant %s not found", tenantID) +// RevokeTenant disables and schedules deletion of all keys for a tenant. +// This effectively makes all encrypted data inaccessible per ADR-015. +func (km *TenantKeyManager) RevokeTenant(hierarchy *KeyHierarchy) error { + // Immediately disable the key per ADR-015 + if err := km.kms.DisableKey(km.ctx, hierarchy.KMSKeyID); err != nil { + return fmt.Errorf("failed to disable key: %w", err) } - // Overwrite key before deleting (best effort) - key := km.rootKeys[tenantID] - for i := range key { - key[i] = 0 + // Schedule hard deletion after 90 days per ADR-015 + _, err := km.kms.ScheduleKeyDeletion(km.ctx, hierarchy.KMSKeyID, 90) + if err != nil { + return fmt.Errorf("failed to schedule key deletion: %w", err) } - delete(km.rootKeys, tenantID) + + // Flush DEK cache for this tenant + km.cache.Flush(hierarchy.TenantID) return nil } -// GenerateDataEncryptionKey creates a unique DEK for an artifact -// The DEK is wrapped (encrypted) under the tenant's root key -func (km *TenantKeyManager) GenerateDataEncryptionKey(tenantID string, artifactID string) (*WrappedDEK, error) { - rootKey, exists := km.rootKeys[tenantID] - if !exists { - return nil, fmt.Errorf("no root key found for tenant %s", tenantID) - } - +// GenerateDataEncryptionKey creates a unique DEK for an artifact. +// The DEK is wrapped (encrypted) under the tenant's KMS root key. +func (km *TenantKeyManager) GenerateDataEncryptionKey(tenantID, artifactID, kmsKeyID string) (*WrappedDEK, error) { // Generate unique DEK (32 bytes for AES-256) dek := make([]byte, 32) if _, err := io.ReadFull(rand.Reader, dek); err != nil { return nil, fmt.Errorf("failed to generate DEK: %w", err) } - // Wrap DEK with root key - wrappedKey, err := km.wrapKey(rootKey, dek) + // Wrap DEK with KMS root key + wrappedKey, err := km.wrapKeyWithKMS(km.ctx, kmsKeyID, dek) if err != nil { return nil, fmt.Errorf("failed to wrap DEK: %w", err) } + // Store DEK in cache for future use per ADR-012 + if err := km.cache.Put(tenantID, artifactID, kmsKeyID, dek); err != nil { + // Log but don't fail - caching is optimization + _ = err + } + // Clear plaintext DEK from memory for i := range dek { dek[i] = 0 } return &WrappedDEK{ - TenantID: tenantID, - ArtifactID: artifactID, - WrappedKey: wrappedKey, - Algorithm: "AES-256-GCM", - CreatedAt: time.Now().UTC(), + TenantID: tenantID, + ArtifactID: artifactID, + WrappedKey: wrappedKey, + Algorithm: "AES-256-GCM", + CreatedAt: time.Now().UTC(), }, nil } -// UnwrapDataEncryptionKey decrypts a wrapped DEK using the tenant's root key -func (km *TenantKeyManager) UnwrapDataEncryptionKey(wrappedDEK *WrappedDEK) ([]byte, error) { - rootKey, exists := km.rootKeys[wrappedDEK.TenantID] - if !exists { - return nil, fmt.Errorf("no root key found for tenant %s", wrappedDEK.TenantID) +// wrapKeyWithKMS encrypts a key using the KMS. +func (km *TenantKeyManager) wrapKeyWithKMS(ctx context.Context, kmsKeyID string, keyToWrap []byte) (string, error) { + ciphertext, err := km.kms.Encrypt(ctx, kmsKeyID, keyToWrap) + if err != nil { + return "", fmt.Errorf("KMS encrypt failed: %w", err) + } + return base64.StdEncoding.EncodeToString(ciphertext), nil +} + +// UnwrapDataEncryptionKey decrypts a wrapped DEK using the tenant's KMS root key. +// Per ADR-012/013: Checks cache first, falls back to KMS with fail-closed grace window. +func (km *TenantKeyManager) UnwrapDataEncryptionKey(wrappedDEK *WrappedDEK, kmsKeyID string) ([]byte, error) { + // Try cache first per ADR-012 - include KMSKeyID in cache key for isolation + if dek, ok := km.cache.Get(wrappedDEK.TenantID, wrappedDEK.ArtifactID, kmsKeyID, false); ok { + return dek, nil } - return km.unwrapKey(rootKey, wrappedDEK.WrappedKey) + // Check KMS health for grace window determination per ADR-013 + kmsHealthy := km.kms.HealthCheck(km.ctx) == nil + + // If KMS is unavailable and we have a cached entry in grace window, use it per ADR-013 + if !kmsHealthy { + if dek, ok := km.cache.Get(wrappedDEK.TenantID, wrappedDEK.ArtifactID, kmsKeyID, true); ok { + // Grace window DEK returned - logged by caller + return dek, nil + } + // No cached DEK and KMS unavailable - fail closed per ADR-013 + return nil, fmt.Errorf("KMS unavailable and no cached DEK (fail-closed per ADR-013)") + } + + // Unwrap via KMS + ciphertext, err := base64.StdEncoding.DecodeString(wrappedDEK.WrappedKey) + if err != nil { + return nil, fmt.Errorf("failed to decode wrapped key: %w", err) + } + + dek, err := km.kms.Decrypt(km.ctx, kmsKeyID, ciphertext) + if err != nil { + return nil, fmt.Errorf("KMS decrypt failed: %w", err) + } + + // Store in cache for future use per ADR-012 - include KMSKeyID + if err := km.cache.Put(wrappedDEK.TenantID, wrappedDEK.ArtifactID, kmsKeyID, dek); err != nil { + // Log but don't fail - caching is optimization + _ = err + } + + return dek, nil } // WrappedDEK represents a data encryption key wrapped under a tenant root key @@ -146,63 +202,25 @@ type WrappedDEK struct { CreatedAt time.Time `json:"created_at"` } -// wrapKey encrypts a key using AES-256-GCM with the provided root key -func (km *TenantKeyManager) wrapKey(rootKey, keyToWrap []byte) (string, error) { - block, err := aes.NewCipher(rootKey) - if err != nil { - return "", fmt.Errorf("failed to create cipher: %w", err) - } - - gcm, err := cipher.NewGCM(block) - if err != nil { - return "", fmt.Errorf("failed to create GCM: %w", err) - } - - nonce := make([]byte, gcm.NonceSize()) - if _, err := io.ReadFull(rand.Reader, nonce); err != nil { - return "", fmt.Errorf("failed to generate nonce: %w", err) - } - - ciphertext := gcm.Seal(nonce, nonce, keyToWrap, nil) - return base64.StdEncoding.EncodeToString(ciphertext), nil +// NewTestTenantKeyManager creates a tenant key manager with memory provider for testing. +// This provides backward compatibility for existing tests. +func NewTestTenantKeyManager() *TenantKeyManager { + provider := kms.NewMemoryProvider() + cache := kms.NewDEKCache(kms.DefaultCacheConfig()) + config := kms.Config{Provider: kms.ProviderTypeMemory} + return NewTenantKeyManager(provider, cache, config) } -// unwrapKey decrypts a wrapped key using AES-256-GCM -func (km *TenantKeyManager) unwrapKey(rootKey []byte, wrappedKey string) ([]byte, error) { - ciphertext, err := base64.StdEncoding.DecodeString(wrappedKey) - if err != nil { - return nil, fmt.Errorf("failed to decode wrapped key: %w", err) - } - - block, err := aes.NewCipher(rootKey) - if err != nil { - return nil, fmt.Errorf("failed to create cipher: %w", err) - } - - gcm, err := cipher.NewGCM(block) - if err != nil { - return nil, fmt.Errorf("failed to create GCM: %w", err) - } - - nonceSize := gcm.NonceSize() - if len(ciphertext) < nonceSize { - return nil, fmt.Errorf("ciphertext too short") - } - - nonce, ciphertext := ciphertext[:nonceSize], ciphertext[nonceSize:] - return gcm.Open(nil, nonce, ciphertext, nil) -} - -// EncryptArtifact encrypts artifact data using a tenant-specific DEK -func (km *TenantKeyManager) EncryptArtifact(tenantID string, artifactID string, plaintext []byte) (*EncryptedArtifact, error) { +// EncryptArtifact encrypts artifact data using a tenant-specific DEK. +func (km *TenantKeyManager) EncryptArtifact(tenantID, artifactID, kmsKeyID string, plaintext []byte) (*EncryptedArtifact, error) { // Generate a new DEK for this artifact - wrappedDEK, err := km.GenerateDataEncryptionKey(tenantID, artifactID) + wrappedDEK, err := km.GenerateDataEncryptionKey(tenantID, artifactID, kmsKeyID) if err != nil { return nil, err } - // Unwrap the DEK for use - dek, err := km.UnwrapDataEncryptionKey(wrappedDEK) + // Get the DEK (from cache or unwrap) + dek, err := km.UnwrapDataEncryptionKey(wrappedDEK, kmsKeyID) if err != nil { return nil, err } @@ -234,14 +252,15 @@ func (km *TenantKeyManager) EncryptArtifact(tenantID string, artifactID string, return &EncryptedArtifact{ Ciphertext: base64.StdEncoding.EncodeToString(ciphertext), DEK: wrappedDEK, + KMSKeyID: kmsKeyID, Algorithm: "AES-256-GCM", }, nil } -// DecryptArtifact decrypts artifact data using its wrapped DEK -func (km *TenantKeyManager) DecryptArtifact(encrypted *EncryptedArtifact) ([]byte, error) { +// DecryptArtifact decrypts artifact data using its wrapped DEK. +func (km *TenantKeyManager) DecryptArtifact(encrypted *EncryptedArtifact, kmsKeyID string) ([]byte, error) { // Unwrap the DEK - dek, err := km.UnwrapDataEncryptionKey(encrypted.DEK) + dek, err := km.UnwrapDataEncryptionKey(encrypted.DEK, kmsKeyID) if err != nil { return nil, fmt.Errorf("failed to unwrap DEK: %w", err) } @@ -278,9 +297,10 @@ func (km *TenantKeyManager) DecryptArtifact(encrypted *EncryptedArtifact) ([]byt // EncryptedArtifact represents an encrypted artifact with its wrapped DEK type EncryptedArtifact struct { - Ciphertext string `json:"ciphertext"` // base64 encoded - DEK *WrappedDEK `json:"dek"` - Algorithm string `json:"algorithm"` + Ciphertext string `json:"ciphertext"` // base64 encoded + DEK *WrappedDEK `json:"dek"` + KMSKeyID string `json:"kms_key_id"` // Per ADR-014 + Algorithm string `json:"algorithm"` } // AuditLogEntry represents an audit log entry for encryption/decryption operations