fetch_ml/internal/privacy/pii.go
Jeremie Fraeys 4cdb68907e
refactor(utilities): update supporting modules for scheduler integration
Update utility modules:
- File utilities with secure file operations
- Environment pool with resource tracking
- Error types with scheduler error categories
- Logging with audit context support
- Network/SSH with connection pooling
- Privacy/PII handling with tenant boundaries
- Resource manager with scheduler allocation
- Security monitor with audit integration
- Tracking plugins (MLflow, TensorBoard) with auth
- Crypto signing with tenant keys
- Database init with multi-user support
2026-02-26 12:07:15 -05:00

55 lines
1.5 KiB
Go

// Package privacy provides PII detection for narratives and annotations.
package privacy
import (
"regexp"
)
// piiPatterns contains regex patterns for detecting PII.
var piiPatterns = map[string]*regexp.Regexp{
"email": regexp.MustCompile(`\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b`),
"ssn": regexp.MustCompile(`\b\d{3}-\d{2}-\d{4}\b`),
"phone": regexp.MustCompile(`\b\d{3}-\d{3}-\d{4}\b`),
"credit_card": regexp.MustCompile(`\b(?:\d[ -]*?){13,16}\b`),
"ip_address": regexp.MustCompile(`\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b`),
}
// PIIFinding represents a detected PII instance.
type PIIFinding struct {
Type string `json:"type"`
Sample string `json:"sample"`
Position int `json:"position"`
Length int `json:"length"`
}
// DetectPII scans text for potential PII.
func DetectPII(text string) []PIIFinding {
var findings []PIIFinding
for piiType, pattern := range piiPatterns {
matches := pattern.FindAllStringIndex(text, -1)
for _, match := range matches {
findings = append(findings, PIIFinding{
Type: piiType,
Position: match[0],
Length: match[1] - match[0],
Sample: RedactSample(text[match[0]:match[1]]),
})
}
}
return findings
}
// HasPII returns true if text contains PII.
func HasPII(text string) bool {
return len(DetectPII(text)) > 0
}
// RedactSample creates a safe sample for reporting.
func RedactSample(match string) string {
if len(match) <= 4 {
return "[PII]"
}
return match[:2] + "..." + match[len(match)-2:]
}