fetch_ml/internal/crypto/kms/cache.go
Jeremie Fraeys cb25677695
feat(kms): implement core KMS infrastructure with DEK cache
Add KMSProvider interface for external key management systems:
- Encrypt/Decrypt operations for DEK wrapping
- Key lifecycle management (Create, Disable, ScheduleDeletion, Enable)
- HealthCheck and Close methods

Implement MemoryProvider for development/testing:
- XOR encryption with HMAC-SHA256 authentication
- Secure random key generation using crypto/rand
- MAC verification to detect wrong keys

Implement DEKCache per ADR-012:
- 15-minute TTL with configurable grace window (1 hour)
- LRU eviction with 1000 entry limit
- Cache key includes (tenantID, artifactID, kmsKeyID) for isolation
- Thread-safe operations with RWMutex
- Secure memory wiping on eviction/cleanup

Add config package with types:
- ProviderType enum (vault, aws, memory)
- VaultConfig with AppRole/Kubernetes/Token auth
- AWSConfig with region and alias prefix
- CacheConfig with TTL, MaxEntries, GraceWindow
- Validation methods for all config types
2026-03-03 19:13:55 -05:00

277 lines
6.5 KiB
Go

package kms
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"sync"
"time"
)
// DEKCache implements an in-process cache for unwrapped DEKs per ADR-012.
// - TTL: 15 minutes per entry
// - Max size: 1000 entries (LRU eviction)
// - Scope: In-process only, never serialized to disk
type DEKCache struct {
mu sync.RWMutex
entries map[string]*cacheEntry
ttl time.Duration
maxEntries int
graceWindow time.Duration // Per ADR-013
evictionList []string // Simple LRU: front = oldest, back = newest
}
// cacheEntry holds a cached DEK with metadata.
type cacheEntry struct {
dek []byte
tenantID string
artifactID string
kmsKeyID string
createdAt time.Time
lastAccess time.Time
evicted bool
}
// NewDEKCache creates a new DEK cache with the specified configuration.
func NewDEKCache(cfg CacheConfig) *DEKCache {
return &DEKCache{
entries: make(map[string]*cacheEntry),
ttl: cfg.TTL,
maxEntries: cfg.MaxEntries,
graceWindow: cfg.GraceWindow,
evictionList: make([]string, 0),
}
}
// cacheKey generates a unique key for the cache map including KMS key ID.
func cacheKey(tenantID, artifactID, kmsKeyID string) string {
// Use a hash to avoid storing raw IDs in the key
h := sha256.New()
_, _ = h.Write([]byte(tenantID + "/" + artifactID + "/" + kmsKeyID))
return hex.EncodeToString(h.Sum(nil))
}
// Get retrieves a DEK from the cache if present and not expired.
// Returns nil and false if not found or expired.
// Per ADR-013: During KMS unavailability, expired entries within grace window are still returned.
func (c *DEKCache) Get(tenantID, artifactID, kmsKeyID string, kmsUnavailable bool) ([]byte, bool) {
key := cacheKey(tenantID, artifactID, kmsKeyID)
c.mu.RLock()
entry, exists := c.entries[key]
c.mu.RUnlock()
if !exists || entry.evicted {
return nil, false
}
now := time.Now()
age := now.Sub(entry.createdAt)
// Check if expired
if age > c.ttl {
// Per ADR-013: Grace window only applies during KMS unavailability
if !kmsUnavailable {
return nil, false
}
// Check grace window
graceAge := age - c.ttl
if graceAge > c.graceWindow {
return nil, false
}
// Within grace window - return the DEK but log that we're using grace period
// (caller should log this appropriately)
}
// Update last access time (need write lock)
c.mu.Lock()
entry.lastAccess = now
c.updateLRU(key)
c.mu.Unlock()
// Return a copy of the DEK to prevent external modification
dekCopy := make([]byte, len(entry.dek))
copy(dekCopy, entry.dek)
return dekCopy, true
}
// Put stores a DEK in the cache.
func (c *DEKCache) Put(tenantID, artifactID, kmsKeyID string, dek []byte) error {
if len(dek) == 0 {
return fmt.Errorf("cannot cache empty DEK")
}
key := cacheKey(tenantID, artifactID, kmsKeyID)
// Copy the DEK to prevent external modification
dekCopy := make([]byte, len(dek))
copy(dekCopy, dek)
c.mu.Lock()
defer c.mu.Unlock()
// Check if we need to evict
if len(c.entries) >= c.maxEntries {
c.evictLRU()
}
// Store the entry
now := time.Now()
c.entries[key] = &cacheEntry{
dek: dekCopy,
tenantID: tenantID,
artifactID: artifactID,
kmsKeyID: kmsKeyID,
createdAt: now,
lastAccess: now,
}
// Add to LRU list (newest at back)
c.evictionList = append(c.evictionList, key)
return nil
}
// Flush removes all DEKs for a specific tenant from the cache.
// Called on key rotation events and tenant offboarding per ADR-012.
func (c *DEKCache) Flush(tenantID string) {
c.mu.Lock()
defer c.mu.Unlock()
// Mark entries for eviction
for key, entry := range c.entries {
if entry.tenantID == tenantID {
entry.evicted = true
delete(c.entries, key)
}
}
// Rebuild eviction list without flushed entries
newList := make([]string, 0, len(c.evictionList))
for _, key := range c.evictionList {
if entry, exists := c.entries[key]; exists && !entry.evicted {
newList = append(newList, key)
}
}
c.evictionList = newList
}
// Clear removes all entries from the cache.
func (c *DEKCache) Clear() {
c.mu.Lock()
defer c.mu.Unlock()
// Securely wipe DEK bytes before dropping references
for _, entry := range c.entries {
for i := range entry.dek {
entry.dek[i] = 0
}
}
c.entries = make(map[string]*cacheEntry)
c.evictionList = make([]string, 0)
}
// Stats returns current cache statistics.
func (c *DEKCache) Stats() CacheStats {
c.mu.RLock()
defer c.mu.RUnlock()
return CacheStats{
Size: len(c.entries),
MaxSize: c.maxEntries,
TTL: c.ttl,
GraceWindow: c.graceWindow,
}
}
// CacheStats holds cache statistics.
type CacheStats struct {
Size int
MaxSize int
TTL time.Duration
GraceWindow time.Duration
}
// updateLRU moves the accessed key to the back of the list (most recently used).
func (c *DEKCache) updateLRU(key string) {
// Find and remove key from current position
for i, k := range c.evictionList {
if k == key {
// Remove from current position
c.evictionList = append(c.evictionList[:i], c.evictionList[i+1:]...)
break
}
}
// Add to back (most recent)
c.evictionList = append(c.evictionList, key)
}
// evictLRU removes the oldest entry (front of list).
func (c *DEKCache) evictLRU() {
if len(c.evictionList) == 0 {
return
}
// Remove oldest (front of list)
oldestKey := c.evictionList[0]
c.evictionList = c.evictionList[1:]
// Securely wipe DEK bytes
if entry, exists := c.entries[oldestKey]; exists {
for i := range entry.dek {
entry.dek[i] = 0
}
entry.evicted = true
}
delete(c.entries, oldestKey)
}
// cleanupExpired periodically removes expired entries.
// This should be called periodically (e.g., by a background goroutine).
func (c *DEKCache) cleanupExpired() {
c.mu.Lock()
defer c.mu.Unlock()
now := time.Now()
for key, entry := range c.entries {
if now.Sub(entry.createdAt) > c.ttl+c.graceWindow {
// Securely wipe
for i := range entry.dek {
entry.dek[i] = 0
}
entry.evicted = true
delete(c.entries, key)
}
}
// Rebuild eviction list
newList := make([]string, 0, len(c.entries))
for _, key := range c.evictionList {
if _, exists := c.entries[key]; exists {
newList = append(newList, key)
}
}
c.evictionList = newList
}
// StartCleanup starts a background goroutine to periodically clean up expired entries.
func (c *DEKCache) StartCleanup(interval time.Duration) chan struct{} {
stop := make(chan struct{})
go func() {
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
c.cleanupExpired()
case <-stop:
return
}
}
}()
return stop
}