From 90ae9edfff55c19df13b7617b6f3dcff08dba8eb Mon Sep 17 00:00:00 2001 From: Jeremie Fraeys Date: Mon, 23 Feb 2026 19:44:00 -0500 Subject: [PATCH] feat(verification): Custom linting tool (fetchml-vet) for structural invariants Add golang.org/x/tools/go/analysis based linting tool: - fetchml-vet: Custom go vet tool for security invariants Add analyzers for critical security patterns: - noBareDetector: Ensures CreateDetector always captures DetectionInfo (prevents silent metadata loss in GPU detection) - manifestEnv: Validates functions returning Artifacts populate Environment (ensures reproducibility metadata capture) - noInlineCredentials: Detects inline credential patterns in config structs (enforces environment variable references) - hipaaComplete: Validates HIPAA mode configs have all required fields (structural check for compliance completeness) Integration with make lint-custom: - Builds bin/fetchml-vet from tools/fetchml-vet/cmd/fetchml-vet/ - Runs with: go vet -vettool=bin/fetchml-vet ./internal/... Part of: V.4 custom linting from security plan --- tools/fetchml-vet/analyzers/hipaacomplete.go | 223 ++++++++++++++++++ tools/fetchml-vet/analyzers/manifestenv.go | 96 ++++++++ tools/fetchml-vet/analyzers/nobaredetector.go | 196 +++++++++++++++ .../analyzers/noinlinecredentials.go | 120 ++++++++++ tools/fetchml-vet/cmd/fetchml-vet/main.go | 16 ++ 5 files changed, 651 insertions(+) create mode 100644 tools/fetchml-vet/analyzers/hipaacomplete.go create mode 100644 tools/fetchml-vet/analyzers/manifestenv.go create mode 100644 tools/fetchml-vet/analyzers/nobaredetector.go create mode 100644 tools/fetchml-vet/analyzers/noinlinecredentials.go create mode 100644 tools/fetchml-vet/cmd/fetchml-vet/main.go diff --git a/tools/fetchml-vet/analyzers/hipaacomplete.go b/tools/fetchml-vet/analyzers/hipaacomplete.go new file mode 100644 index 0000000..f5d6815 --- /dev/null +++ b/tools/fetchml-vet/analyzers/hipaacomplete.go @@ -0,0 +1,223 @@ +package analyzers + +import ( + "go/ast" + "go/token" + "strings" + + "golang.org/x/tools/go/analysis" +) + +// HIPAACompletenessAnalyzer flags any switch or if-else statement that checks +// compliance_mode == "hipaa" but doesn't verify all six hard-required fields. +// This prevents partial HIPAA enforcement from silently passing. +var HIPAACompletenessAnalyzer = &analysis.Analyzer{ + Name: "hippacomplete", + Doc: "flags incomplete HIPAA compliance mode checks", + Run: runHIPAACompleteness, +} + +// hipaaRequiredFields are the six fields that must be checked in HIPAA mode +var hipaaRequiredFields = []string{ + "ConfigHash", + "SandboxSeccomp", + "SandboxNoNewPrivs", + "SandboxNetworkMode", + "MaxWorkers", + "ComplianceMode", +} + +func runHIPAACompleteness(pass *analysis.Pass) (interface{}, error) { + for _, file := range pass.Files { + ast.Inspect(file, func(n ast.Node) bool { + // Look for if statements + ifStmt, ok := n.(*ast.IfStmt) + if ok { + checkHIPAACondition(pass, ifStmt.Cond, ifStmt.Body) + return true + } + + // Look for switch statements + switchStmt, ok := n.(*ast.SwitchStmt) + if ok { + checkHIPAASwitch(pass, switchStmt) + return true + } + + return true + }) + } + return nil, nil +} + +// checkHIPAACondition checks if an if condition is checking for HIPAA mode +func checkHIPAACondition(pass *analysis.Pass, cond ast.Expr, body *ast.BlockStmt) { + // Check if condition checks for "hipaa" string + if !containsHIPAACheck(cond) { + return + } + + // Check what fields are accessed in the body + checkedFields := extractCheckedFields(body) + + // Report missing fields + var missing []string + for _, required := range hipaaRequiredFields { + found := false + for _, checked := range checkedFields { + if strings.EqualFold(checked, required) { + found = true + break + } + } + if !found { + missing = append(missing, required) + } + } + + // If checking HIPAA mode but not all required fields, report it + if len(missing) > 0 && len(missing) < len(hipaaRequiredFields) { + // Partial check detected - this is the problematic case + pass.Reportf(body.Pos(), + "HIPAA compliance mode check is incomplete - missing required fields: %v", + missing) + } +} + +// checkHIPAASwitch checks switch statements for HIPAA mode handling +func checkHIPAASwitch(pass *analysis.Pass, switchStmt *ast.SwitchStmt) { + // Check if the switch tag checks compliance mode + if switchStmt.Tag == nil { + return + } + + if !isComplianceModeCheck(switchStmt.Tag) { + return + } + + // Find the "hipaa" case + var hipaaCase *ast.CaseClause + for _, stmt := range switchStmt.Body.List { + caseClause, ok := stmt.(*ast.CaseClause) + if !ok { + continue + } + + // Check if this case is for "hipaa" + for _, val := range caseClause.List { + if isHipaaString(val) { + hipaaCase = caseClause + break + } + } + if hipaaCase != nil { + break + } + } + + if hipaaCase == nil { + return + } + + // Check what fields are accessed in the hipaa case body + checkedFields := extractCheckedFieldsFromStmts(hipaaCase.Body) + + // Report missing fields + var missing []string + for _, required := range hipaaRequiredFields { + found := false + for _, checked := range checkedFields { + if strings.EqualFold(checked, required) { + found = true + break + } + } + if !found { + missing = append(missing, required) + } + } + + if len(missing) > 0 && len(missing) < len(hipaaRequiredFields) { + pass.Reportf(hipaaCase.Pos(), + "HIPAA case is incomplete - missing required field checks: %v", + missing) + } +} + +// containsHIPAACheck checks if an expression contains a check for "hipaa" +func containsHIPAACheck(expr ast.Expr) bool { + switch e := expr.(type) { + case *ast.BinaryExpr: + // Check for == "hipaa" or != "hipaa" + if isHipaaString(e.X) || isHipaaString(e.Y) { + return true + } + // Recursively check both sides + return containsHIPAACheck(e.X) || containsHIPAACheck(e.Y) + case *ast.CallExpr: + // Check for strings.EqualFold(x, "hipaa") or similar + return containsHipaaInCall(e) + } + return false +} + +// isHipaaString checks if an expression is the string literal "hipaa" +func isHipaaString(expr ast.Expr) bool { + lit, ok := expr.(*ast.BasicLit) + if !ok { + return false + } + return lit.Kind == token.STRING && (lit.Value == `"hipaa"` || lit.Value == `"HIPAA"`) +} + +// isComplianceModeCheck checks if an expression is accessing compliance_mode +func isComplianceModeCheck(expr ast.Expr) bool { + switch e := expr.(type) { + case *ast.SelectorExpr: + return strings.EqualFold(e.Sel.Name, "ComplianceMode") || + strings.EqualFold(e.Sel.Name, "compliance_mode") + case *ast.Ident: + return strings.EqualFold(e.Name, "complianceMode") || + strings.EqualFold(e.Name, "compliance_mode") + } + return false +} + +// containsHipaaInCall checks if a function call contains "hipaa" as an argument +func containsHipaaInCall(call *ast.CallExpr) bool { + for _, arg := range call.Args { + if isHipaaString(arg) { + return true + } + } + return false +} + +// extractCheckedFields extracts field names that are accessed in a block +func extractCheckedFields(block *ast.BlockStmt) []string { + if block == nil { + return nil + } + return extractCheckedFieldsFromStmts(block.List) +} + +// extractCheckedFieldsFromStmts extracts field names from a list of statements +func extractCheckedFieldsFromStmts(stmts []ast.Stmt) []string { + var fields []string + + for _, stmt := range stmts { + ast.Inspect(stmt, func(n ast.Node) bool { + // Look for selector expressions (field access) + sel, ok := n.(*ast.SelectorExpr) + if !ok { + return true + } + + fieldName := sel.Sel.Name + fields = append(fields, fieldName) + return true + }) + } + + return fields +} diff --git a/tools/fetchml-vet/analyzers/manifestenv.go b/tools/fetchml-vet/analyzers/manifestenv.go new file mode 100644 index 0000000..eb93427 --- /dev/null +++ b/tools/fetchml-vet/analyzers/manifestenv.go @@ -0,0 +1,96 @@ +package analyzers + +import ( + "go/ast" + "go/types" + "strings" + + "golang.org/x/tools/go/analysis" +) + +// ManifestEnvironmentAnalyzer flags any function that returns manifest.Artifacts +// without explicitly setting the Environment field. This enforces the V.1 requirement +// that Artifacts must always include Environment information for provenance. +var ManifestEnvironmentAnalyzer = &analysis.Analyzer{ + Name: "manifestenv", + Doc: "flags functions returning Artifacts without Environment field set", + Run: runManifestEnvironment, +} + +func runManifestEnvironment(pass *analysis.Pass) (interface{}, error) { + for _, file := range pass.Files { + ast.Inspect(file, func(n ast.Node) bool { + // Look for return statements + ret, ok := n.(*ast.ReturnStmt) + if !ok { + return true + } + + // Check each returned value + for _, result := range ret.Results { + // Check if it's a struct literal + composite, ok := result.(*ast.CompositeLit) + if !ok { + continue + } + + // Check if the type is manifest.Artifacts + typeInfo := pass.TypesInfo.TypeOf(composite) + if typeInfo == nil { + continue + } + + typeStr := typeInfo.String() + if !strings.Contains(typeStr, "manifest.Artifacts") && !strings.Contains(typeStr, "Artifacts") { + continue + } + + // Check if Environment field is set + hasEnv := false + for _, elt := range composite.Elts { + kv, ok := elt.(*ast.KeyValueExpr) + if !ok { + continue + } + key, ok := kv.Key.(*ast.Ident) + if !ok { + continue + } + if key.Name == "Environment" { + hasEnv = true + break + } + } + + if !hasEnv { + pass.Reportf(composite.Pos(), + "returning Artifacts without Environment field set - Environment is required for provenance (V.1)") + } + } + + return true + }) + } + return nil, nil +} + +// isArtifactsType checks if a type is manifest.Artifacts +func isArtifactsType(t types.Type) bool { + if t == nil { + return false + } + named, ok := t.(*types.Named) + if !ok { + return false + } + return named.Obj().Name() == "Artifacts" +} + +// getPackagePath returns the package path of a named type +func getPackagePath(t types.Type) string { + named, ok := t.(*types.Named) + if !ok { + return "" + } + return named.Obj().Pkg().Path() +} diff --git a/tools/fetchml-vet/analyzers/nobaredetector.go b/tools/fetchml-vet/analyzers/nobaredetector.go new file mode 100644 index 0000000..fff0730 --- /dev/null +++ b/tools/fetchml-vet/analyzers/nobaredetector.go @@ -0,0 +1,196 @@ +// Package analyzers provides custom Go analysis rules for FetchML code quality and security +package analyzers + +import ( + "go/ast" + "go/token" + "go/types" + "strings" + + "golang.org/x/tools/go/analysis" +) + +// NoBareDetectorAnalyzer flags calls to GPUDetectorFactory.CreateDetector() that +// silently discard DetectionInfo needed for manifest and audit log. +// Use CreateDetectorWithInfo() instead to capture both the detector and its metadata. +var NoBareDetectorAnalyzer = &analysis.Analyzer{ + Name: "nobaredetector", + Doc: "flags bare CreateDetector calls that discard DetectionInfo", + Run: runNoBareDetector, +} + +func runNoBareDetector(pass *analysis.Pass) (interface{}, error) { + for _, file := range pass.Files { + ast.Inspect(file, func(n ast.Node) bool { + // Look for call expressions + call, ok := n.(*ast.CallExpr) + if !ok { + return true + } + + // Check if it's a method call like obj.CreateDetector(...) + sel, ok := call.Fun.(*ast.SelectorExpr) + if !ok { + return true + } + + // Check if the method name is CreateDetector + if sel.Sel.Name != "CreateDetector" { + return true + } + + // Check if the result is assigned (which is okay) + // If the parent is an assignment or short var decl, it's being captured + if isAssigned(pass, call) { + return true + } + + // Check if the call is part of a selector chain that includes CreateDetectorWithInfo + // This is a heuristic - if the same object has CreateDetectorWithInfo called on it, + // we assume they're doing the right thing + if hasCreateDetectorWithInfo(pass, call, sel) { + return true + } + + pass.Reportf(call.Pos(), + "bare CreateDetector() call detected - use CreateDetectorWithInfo() to capture DetectionInfo for manifest/audit") + + return true + }) + } + return nil, nil +} + +// isAssigned checks if a call expression's result is being assigned to a variable +func isAssigned(pass *analysis.Pass, call *ast.CallExpr) bool { + // Walk up the AST to see if this call is the RHS of an assignment + // This is a simplified check - in a full implementation, we'd use pass.TypesInfo + // to track the parent node + pos := call.Pos() + for _, file := range pass.Files { + if file.Pos() <= pos && pos <= file.End() { + var found bool + ast.Inspect(file, func(n ast.Node) bool { + if n == nil { + return true + } + if n.Pos() == pos { + // Check if parent is an assignment or short var decl + return false // Stop searching + } + // Check for assignment statement containing this call + switch node := n.(type) { + case *ast.AssignStmt: + for _, rhs := range node.Rhs { + if rhs.Pos() == pos { + found = true + return false + } + } + case *ast.ValueSpec: + for _, val := range node.Values { + if val.Pos() == pos { + found = true + return false + } + } + } + return true + }) + if found { + return true + } + } + } + return false +} + +// hasCreateDetectorWithInfo checks if the same object that has CreateDetector called +// also has CreateDetectorWithInfo called somewhere in the function +func hasCreateDetectorWithInfo(pass *analysis.Pass, call *ast.CallExpr, sel *ast.SelectorExpr) bool { + // Get the object being called on (the receiver) + receiverObj := sel.X + + // Find the containing function + var containingFunc *ast.FuncDecl + for _, file := range pass.Files { + if file.Pos() <= call.Pos() && call.Pos() <= file.End() { + ast.Inspect(file, func(n ast.Node) bool { + if fn, ok := n.(*ast.FuncDecl); ok { + if fn.Pos() <= call.Pos() && call.Pos() <= fn.End() { + containingFunc = fn + return false + } + } + return true + }) + break + } + } + + if containingFunc == nil { + return false + } + + // Check if CreateDetectorWithInfo is called on the same object in the same function + found := false + ast.Inspect(containingFunc, func(n ast.Node) bool { + call2, ok := n.(*ast.CallExpr) + if !ok { + return true + } + sel2, ok := call2.Fun.(*ast.SelectorExpr) + if !ok { + return true + } + if sel2.Sel.Name != "CreateDetectorWithInfo" { + return true + } + + // Check if it's the same receiver object + if isSameObject(receiverObj, sel2.X) { + found = true + return false + } + return true + }) + + return found +} + +// isSameObject checks if two AST expressions refer to the same object +func isSameObject(a, b ast.Expr) bool { + // Simple string comparison for identifiers and selectors + // In a full implementation, we'd use types.Object from type checking + aStr := exprToString(a) + bStr := exprToString(b) + return aStr == bStr +} + +// exprToString converts an expression to a simple string representation +func exprToString(expr ast.Expr) string { + switch e := expr.(type) { + case *ast.Ident: + return e.Name + case *ast.SelectorExpr: + return exprToString(e.X) + "." + e.Sel.Name + default: + return "" + } +} + +// getTypeName returns a simple type name string +func getTypeName(t types.Type) string { + if t == nil { + return "" + } + return strings.TrimPrefix(t.String(), "github.com/jfraeys/fetch_ml/") +} + +// isWithinFunction checks if a position is within a function declaration +func isWithinFunction(fset *token.FileSet, pos token.Pos, fn *ast.FuncDecl) bool { + if fn == nil { + return false + } + return fn.Pos() <= pos && pos <= fn.End() +} diff --git a/tools/fetchml-vet/analyzers/noinlinecredentials.go b/tools/fetchml-vet/analyzers/noinlinecredentials.go new file mode 100644 index 0000000..2c52297 --- /dev/null +++ b/tools/fetchml-vet/analyzers/noinlinecredentials.go @@ -0,0 +1,120 @@ +package analyzers + +import ( + "go/ast" + "go/token" + "strings" + + "golang.org/x/tools/go/analysis" +) + +// NoInlineCredentialsAnalyzer flags struct literals of type worker.Config where +// sensitive fields (RedisPassword, SecretKey, AccessKey) are set to non-empty string +// literals instead of environment variable references. Credentials must not appear +// in source or config files. +var NoInlineCredentialsAnalyzer = &analysis.Analyzer{ + Name: "noinlinecreds", + Doc: "flags inline credentials in Config struct literals", + Run: runNoInlineCredentials, +} + +// sensitiveCredentialFields lists field names that should never have inline string literals +var sensitiveCredentialFields = []string{ + "RedisPassword", + "SecretKey", + "AccessKey", + "Password", + "APIKey", + "Token", + "PrivateKey", +} + +func runNoInlineCredentials(pass *analysis.Pass) (interface{}, error) { + for _, file := range pass.Files { + ast.Inspect(file, func(n ast.Node) bool { + // Look for composite literals (struct initialization) + composite, ok := n.(*ast.CompositeLit) + if !ok { + return true + } + + // Check if this is a Config type + typeInfo := pass.TypesInfo.TypeOf(composite) + if typeInfo == nil { + return true + } + + typeStr := typeInfo.String() + // Match worker.Config or Config types + if !strings.Contains(typeStr, "Config") { + return true + } + + // Check each field in the composite literal + for _, elt := range composite.Elts { + kv, ok := elt.(*ast.KeyValueExpr) + if !ok { + continue + } + + key, ok := kv.Key.(*ast.Ident) + if !ok { + continue + } + + // Check if this is a sensitive field + if !isSensitiveField(key.Name) { + continue + } + + // Check if the value is a string literal (not env var or function call) + if isInlineStringLiteral(kv.Value) { + // Check if it's not empty (empty strings might be intentional) + if !isEmptyStringLiteral(kv.Value) { + pass.Reportf(kv.Value.Pos(), + "inline credential detected in field %s - use environment variables instead (e.g., os.Getenv)", + key.Name) + } + } + } + + return true + }) + } + return nil, nil +} + +// isSensitiveField checks if a field name is in the sensitive credentials list +func isSensitiveField(name string) bool { + for _, sensitive := range sensitiveCredentialFields { + if name == sensitive { + return true + } + } + return false +} + +// isInlineStringLiteral checks if an expression is a string literal (not env var ref) +func isInlineStringLiteral(expr ast.Expr) bool { + switch e := expr.(type) { + case *ast.BasicLit: + // String literal like "password123" + return e.Kind == token.STRING + case *ast.BinaryExpr: + // String concatenation like "prefix" + "suffix" + if e.Op == token.ADD { + return isInlineStringLiteral(e.X) || isInlineStringLiteral(e.Y) + } + } + return false +} + +// isEmptyStringLiteral checks if an expression is an empty string literal +func isEmptyStringLiteral(expr ast.Expr) bool { + lit, ok := expr.(*ast.BasicLit) + if !ok || lit.Kind != token.STRING { + return false + } + // Remove quotes and check if empty + return lit.Value == `""` || lit.Value == "``" +} diff --git a/tools/fetchml-vet/cmd/fetchml-vet/main.go b/tools/fetchml-vet/cmd/fetchml-vet/main.go new file mode 100644 index 0000000..99ff4c9 --- /dev/null +++ b/tools/fetchml-vet/cmd/fetchml-vet/main.go @@ -0,0 +1,16 @@ +// Package main implements the fetchml-vet custom analyzer tool +package main + +import ( + "github.com/jfraeys/fetch_ml/tools/fetchml-vet/analyzers" + "golang.org/x/tools/go/analysis/multichecker" +) + +func main() { + multichecker.Main( + analyzers.NoBareDetectorAnalyzer, + analyzers.ManifestEnvironmentAnalyzer, + analyzers.NoInlineCredentialsAnalyzer, + analyzers.HIPAACompletenessAnalyzer, + ) +}