fetch_ml/tests/unit/privacy/pii_test.go

package privacy_test

import (
	"testing"

	"github.com/jfraeys/fetch_ml/internal/privacy"
)

func TestDetectPII(t *testing.T) {
	tests := []struct {
		name     string
		text     string
		expected []string // Expected PII types found
	}{
		{
			name:     "email detection",
			text:     "Contact me at user@example.com for details",
			expected: []string{"email"},
		},
		{
			name:     "SSN detection",
			text:     "My SSN is 123-45-6789",
			expected: []string{"ssn"},
		},
		{
			name:     "phone detection",
			text:     "Call me at 555-123-4567",
			expected: []string{"phone"},
		},
		{
			name:     "IP address detection",
			text:     "Server at 192.168.1.1",
			expected: []string{"ip_address"},
		},
		{
			name:     "multiple PII types",
			text:     "Email: test@example.com, SSN: 123-45-6789",
			expected: []string{"email", "ssn"},
		},
		{
			name:     "no PII",
			text:     "This is just a normal hypothesis about learning rates",
			expected: []string{},
		},
		{
			name:     "credit card detection",
			text:     "Card: 4111-1111-1111-1111",
			expected: []string{"credit_card"},
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			findings := privacy.DetectPII(tt.text)

			if len(tt.expected) == 0 {
				if len(findings) != 0 {
					t.Errorf("expected no PII, found %d findings", len(findings))
				}
				return
			}

			// Check that all expected types are found
			foundTypes := make(map[string]bool)
			for _, f := range findings {
				foundTypes[f.Type] = true
			}

			for _, expectedType := range tt.expected {
				if !foundTypes[expectedType] {
					t.Errorf("expected to find %s, but didn't", expectedType)
				}
			}
		})
	}
}

func TestHasPII(t *testing.T) {
	tests := []struct {
		name     string
		text     string
		expected bool
	}{
		{
			name:     "has PII",
			text:     "Contact: user@example.com",
			expected: true,
		},
		{
			name:     "no PII",
			text:     "Learning rate 0.01 worked well",
			expected: false,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			result := privacy.HasPII(tt.text)
			if result != tt.expected {
				t.Errorf("HasPII() = %v, want %v", result, tt.expected)
			}
		})
	}
}

func TestRedactSample(t *testing.T) {
	tests := []struct {
		name  string
		match string
		want  string
	}{
		{
			name:  "short match",
			match: "abc",
			want:  "[PII]",
		},
		{
			name:  "long match",
			match: "user@example.com",
			want:  "us...om",
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			got := privacy.RedactSample(tt.match)
			if got != tt.want {
				t.Errorf("redactSample() = %v, want %v", got, tt.want)
			}
		})
	}
}