Add privacy protection features to prevent accidental PII leakage: - PII detection engine supporting emails, phone numbers, SSNs, credit cards - CLI privacy command for scanning files and text - Privacy middleware for API request/response filtering - Suggestion utility for privacy-preserving alternatives Integrates PII scanning into manifest validation for narrative fields.
264 lines
7.7 KiB
Zig
264 lines
7.7 KiB
Zig
const std = @import("std");
|
|
|
|
/// Calculate Levenshtein distance between two strings
|
|
pub fn levenshteinDistance(allocator: std.mem.Allocator, s1: []const u8, s2: []const u8) !usize {
|
|
const m = s1.len + 1;
|
|
const n = s2.len + 1;
|
|
|
|
// Create a 2D array for dynamic programming
|
|
var dp = try allocator.alloc(usize, m * n);
|
|
defer allocator.free(dp);
|
|
|
|
// Initialize first row and column
|
|
for (0..m) |i| {
|
|
dp[i * n] = i;
|
|
}
|
|
for (0..n) |j| {
|
|
dp[j] = j;
|
|
}
|
|
|
|
// Fill the matrix
|
|
for (1..m) |i| {
|
|
for (1..n) |j| {
|
|
const cost: usize = if (s1[i - 1] == s2[j - 1]) 0 else 1;
|
|
const deletion = dp[(i - 1) * n + j] + 1;
|
|
const insertion = dp[i * n + (j - 1)] + 1;
|
|
const substitution = dp[(i - 1) * n + (j - 1)] + cost;
|
|
dp[i * n + j] = @min(@min(deletion, insertion), substitution);
|
|
}
|
|
}
|
|
|
|
return dp[(m - 1) * n + (n - 1)];
|
|
}
|
|
|
|
/// Find suggestions for a typo from a list of candidates
|
|
pub fn findSuggestions(
|
|
allocator: std.mem.Allocator,
|
|
input: []const u8,
|
|
candidates: []const []const u8,
|
|
max_distance: usize,
|
|
max_suggestions: usize,
|
|
) ![][]const u8 {
|
|
var suggestions = std.ArrayList([]const u8).empty;
|
|
defer suggestions.deinit(allocator);
|
|
|
|
var distances = std.ArrayList(usize).empty;
|
|
defer distances.deinit(allocator);
|
|
|
|
for (candidates) |candidate| {
|
|
const dist = try levenshteinDistance(allocator, input, candidate);
|
|
if (dist <= max_distance) {
|
|
try suggestions.append(allocator, candidate);
|
|
try distances.append(allocator, dist);
|
|
}
|
|
}
|
|
|
|
// Sort by distance (bubble sort for simplicity with small lists)
|
|
const n = distances.items.len;
|
|
for (0..n) |i| {
|
|
for (0..n - i - 1) |j| {
|
|
if (distances.items[j] > distances.items[j + 1]) {
|
|
// Swap distances
|
|
const temp_dist = distances.items[j];
|
|
distances.items[j] = distances.items[j + 1];
|
|
distances.items[j + 1] = temp_dist;
|
|
// Swap corresponding suggestions
|
|
const temp_sugg = suggestions.items[j];
|
|
suggestions.items[j] = suggestions.items[j + 1];
|
|
suggestions.items[j + 1] = temp_sugg;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Return top suggestions
|
|
const count = @min(suggestions.items.len, max_suggestions);
|
|
const result = try allocator.alloc([]const u8, count);
|
|
for (0..count) |i| {
|
|
result[i] = try allocator.dupe(u8, suggestions.items[i]);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/// Suggest commands based on prefix matching
|
|
pub fn suggestCommands(input: []const u8) ?[]const []const u8 {
|
|
const all_commands = [_][]const u8{
|
|
"init", "sync", "queue", "requeue", "status",
|
|
"monitor", "cancel", "prune", "watch", "dataset",
|
|
"experiment", "narrative", "outcome", "info", "logs",
|
|
"annotate", "validate", "compare", "find", "export",
|
|
};
|
|
|
|
// Exact match - no suggestion needed
|
|
for (all_commands) |cmd| {
|
|
if (std.mem.eql(u8, input, cmd)) return null;
|
|
}
|
|
|
|
// Find prefix matches
|
|
var matches: [5][]const u8 = undefined;
|
|
var match_count: usize = 0;
|
|
|
|
for (all_commands) |cmd| {
|
|
if (std.mem.startsWith(u8, cmd, input)) {
|
|
matches[match_count] = cmd;
|
|
match_count += 1;
|
|
if (match_count >= 5) break;
|
|
}
|
|
}
|
|
|
|
if (match_count == 0) return null;
|
|
|
|
// Return static slice - caller must not free
|
|
return matches[0..match_count];
|
|
}
|
|
|
|
/// Suggest flags for a command
|
|
pub fn suggestFlags(command: []const u8, input: []const u8) ?[]const []const u8 {
|
|
// Common flags for all commands
|
|
const common_flags = [_][]const u8{ "--help", "--verbose", "--quiet", "--json" };
|
|
|
|
// Command-specific flags
|
|
const queue_flags = [_][]const u8{
|
|
"--commit", "--priority", "--cpu", "--memory", "--gpu",
|
|
"--gpu-memory", "--hypothesis", "--context", "--intent", "--expected-outcome",
|
|
"--experiment-group", "--tags", "--dry-run", "--validate", "--explain",
|
|
"--force",
|
|
};
|
|
|
|
const find_flags = [_][]const u8{
|
|
"--tag", "--outcome", "--dataset", "--experiment-group",
|
|
"--author", "--after", "--before", "--limit",
|
|
};
|
|
|
|
const compare_flags = [_][]const u8{
|
|
"--json", "--all", "--fields",
|
|
};
|
|
|
|
const export_flags = [_][]const u8{
|
|
"--bundle", "--anonymize", "--anonymize-level", "--base",
|
|
};
|
|
|
|
// Select flags based on command
|
|
const flags: []const []const u8 = switch (std.meta.stringToEnum(Command, command) orelse .unknown) {
|
|
.queue => &queue_flags,
|
|
.find => &find_flags,
|
|
.compare => &compare_flags,
|
|
.export_cmd => &export_flags,
|
|
else => &common_flags,
|
|
};
|
|
|
|
// Find prefix matches
|
|
var matches: [5][]const u8 = undefined;
|
|
var match_count: usize = 0;
|
|
|
|
// Check common flags first
|
|
for (common_flags) |flag| {
|
|
if (std.mem.startsWith(u8, flag, input)) {
|
|
matches[match_count] = flag;
|
|
match_count += 1;
|
|
if (match_count >= 5) break;
|
|
}
|
|
}
|
|
|
|
// Then check command-specific flags
|
|
if (match_count < 5) {
|
|
for (flags) |flag| {
|
|
if (std.mem.startsWith(u8, flag, input)) {
|
|
// Avoid duplicates
|
|
var already_added = false;
|
|
for (0..match_count) |i| {
|
|
if (std.mem.eql(u8, matches[i], flag)) {
|
|
already_added = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!already_added) {
|
|
matches[match_count] = flag;
|
|
match_count += 1;
|
|
if (match_count >= 5) break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (match_count == 0) return null;
|
|
return matches[0..match_count];
|
|
}
|
|
|
|
const Command = enum {
|
|
init,
|
|
sync,
|
|
queue,
|
|
requeue,
|
|
status,
|
|
monitor,
|
|
cancel,
|
|
prune,
|
|
watch,
|
|
dataset,
|
|
experiment,
|
|
narrative,
|
|
outcome,
|
|
info,
|
|
logs,
|
|
annotate,
|
|
validate,
|
|
compare,
|
|
find,
|
|
export_cmd,
|
|
unknown,
|
|
};
|
|
|
|
/// Format suggestions into a helpful message
|
|
pub fn formatSuggestionMessage(
|
|
allocator: std.mem.Allocator,
|
|
input: []const u8,
|
|
suggestions: []const []const u8,
|
|
) ![]u8 {
|
|
if (suggestions.len == 0) return allocator.dupe(u8, "");
|
|
|
|
var buf = std.ArrayList(u8).empty;
|
|
defer buf.deinit(allocator);
|
|
|
|
const writer = buf.writer(allocator);
|
|
try writer.print("Did you mean for '{s}': ", .{input});
|
|
|
|
for (suggestions, 0..) |sugg, i| {
|
|
if (i > 0) {
|
|
if (i == suggestions.len - 1) {
|
|
try writer.writeAll(" or ");
|
|
} else {
|
|
try writer.writeAll(", ");
|
|
}
|
|
}
|
|
try writer.print("'{s}'", .{sugg});
|
|
}
|
|
|
|
try writer.writeAll("?\n");
|
|
|
|
return buf.toOwnedSlice(allocator);
|
|
}
|
|
|
|
/// Test the suggestion system
|
|
pub fn testSuggestions() !void {
|
|
const allocator = std.testing.allocator;
|
|
|
|
// Test Levenshtein distance
|
|
const dist1 = try levenshteinDistance(allocator, "queue", "quee");
|
|
std.debug.assert(dist1 == 1);
|
|
|
|
const dist2 = try levenshteinDistance(allocator, "status", "statis");
|
|
std.debug.assert(dist2 == 1);
|
|
|
|
// Test suggestions
|
|
const candidates = [_][]const u8{ "queue", "query", "quiet", "quit" };
|
|
const suggestions = try findSuggestions(allocator, "quee", &candidates, 2, 3);
|
|
defer {
|
|
for (suggestions) |s| allocator.free(s);
|
|
allocator.free(suggestions);
|
|
}
|
|
std.debug.assert(suggestions.len > 0);
|
|
std.debug.assert(std.mem.eql(u8, suggestions[0], "queue"));
|
|
|
|
std.debug.print("Suggestion tests passed!\n", .{});
|
|
}
|