fetch_ml/internal/privacy/pii.go
Jeremie Fraeys aaeef69bab
feat: Privacy and PII detection
Add privacy protection features to prevent accidental PII leakage:
- PII detection engine supporting emails, phone numbers, SSNs, credit cards
- CLI privacy command for scanning files and text
- Privacy middleware for API request/response filtering
- Suggestion utility for privacy-preserving alternatives

Integrates PII scanning into manifest validation for narrative fields.
2026-02-18 21:27:23 -05:00

55 lines
1.5 KiB
Go

// Package privacy provides PII detection for narratives and annotations.
package privacy
import (
"regexp"
)
// piiPatterns contains regex patterns for detecting PII.
var piiPatterns = map[string]*regexp.Regexp{
"email": regexp.MustCompile(`\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b`),
"ssn": regexp.MustCompile(`\b\d{3}-\d{2}-\d{4}\b`),
"phone": regexp.MustCompile(`\b\d{3}-\d{3}-\d{4}\b`),
"credit_card": regexp.MustCompile(`\b(?:\d[ -]*?){13,16}\b`),
"ip_address": regexp.MustCompile(`\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b`),
}
// PIIFinding represents a detected PII instance.
type PIIFinding struct {
Type string `json:"type"`
Position int `json:"position"`
Length int `json:"length"`
Sample string `json:"sample"` // Redacted sample
}
// DetectPII scans text for potential PII.
func DetectPII(text string) []PIIFinding {
var findings []PIIFinding
for piiType, pattern := range piiPatterns {
matches := pattern.FindAllStringIndex(text, -1)
for _, match := range matches {
findings = append(findings, PIIFinding{
Type: piiType,
Position: match[0],
Length: match[1] - match[0],
Sample: RedactSample(text[match[0]:match[1]]),
})
}
}
return findings
}
// HasPII returns true if text contains PII.
func HasPII(text string) bool {
return len(DetectPII(text)) > 0
}
// RedactSample creates a safe sample for reporting.
func RedactSample(match string) string {
if len(match) <= 4 {
return "[PII]"
}
return match[:2] + "..." + match[len(match)-2:]
}