// Package privacy provides PII detection for narratives and annotations. package privacy import ( "regexp" ) // piiPatterns contains regex patterns for detecting PII. var piiPatterns = map[string]*regexp.Regexp{ "email": regexp.MustCompile(`\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b`), "ssn": regexp.MustCompile(`\b\d{3}-\d{2}-\d{4}\b`), "phone": regexp.MustCompile(`\b\d{3}-\d{3}-\d{4}\b`), "credit_card": regexp.MustCompile(`\b(?:\d[ -]*?){13,16}\b`), "ip_address": regexp.MustCompile(`\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b`), } // PIIFinding represents a detected PII instance. type PIIFinding struct { Type string `json:"type"` Sample string `json:"sample"` Position int `json:"position"` Length int `json:"length"` } // DetectPII scans text for potential PII. func DetectPII(text string) []PIIFinding { var findings []PIIFinding for piiType, pattern := range piiPatterns { matches := pattern.FindAllStringIndex(text, -1) for _, match := range matches { findings = append(findings, PIIFinding{ Type: piiType, Position: match[0], Length: match[1] - match[0], Sample: RedactSample(text[match[0]:match[1]]), }) } } return findings } // HasPII returns true if text contains PII. func HasPII(text string) bool { return len(DetectPII(text)) > 0 } // RedactSample creates a safe sample for reporting. func RedactSample(match string) string { if len(match) <= 4 { return "[PII]" } return match[:2] + "..." + match[len(match)-2:] }