Add MaxArtifactFiles and MaxArtifactTotalBytes to SandboxConfig: - Default MaxArtifactFiles: 10,000 (configurable via SecurityDefaults) - Default MaxArtifactTotalBytes: 100GB (configurable via SecurityDefaults) - ApplySecurityDefaults() sets defaults if not specified Enforce caps in scanArtifacts() during directory walk: - Returns error immediately when MaxArtifactFiles exceeded - Returns error immediately when MaxArtifactTotalBytes exceeded - Prevents resource exhaustion attacks from malicious artifact trees Update all call sites to pass SandboxConfig for cap enforcement: - Native bridge libs updated to pass caps argument - Benchmark tests updated with nil caps (unlimited for benchmarks) - Unit tests updated with nil caps Closes: artifact ingestion caps items from security plan
130 lines
3 KiB
Go
130 lines
3 KiB
Go
package benchmarks
|
|
|
|
import (
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/jfraeys/fetch_ml/internal/worker"
|
|
)
|
|
|
|
// BenchmarkArtifactScanGo profiles Go filepath.WalkDir implementation
|
|
func BenchmarkArtifactScanGo(b *testing.B) {
|
|
tmpDir := b.TempDir()
|
|
|
|
// Create test artifact structure
|
|
createTestArtifacts(b, tmpDir, 100)
|
|
|
|
b.ReportAllocs()
|
|
|
|
for b.Loop() {
|
|
_, err := worker.ScanArtifacts(tmpDir, false, nil)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// BenchmarkArtifactScanNative profiles C++ platform-optimized traversal
|
|
// Uses: fts on BSD, getdents64 on Linux, getattrlistbulk on macOS
|
|
func BenchmarkArtifactScanNative(b *testing.B) {
|
|
tmpDir := b.TempDir()
|
|
|
|
// Create test artifact structure
|
|
createTestArtifacts(b, tmpDir, 100)
|
|
|
|
b.ReportAllocs()
|
|
|
|
for b.Loop() {
|
|
_, err := worker.ScanArtifactsNative(tmpDir)
|
|
if err != nil {
|
|
if strings.Contains(err.Error(), "native artifact scanner requires") {
|
|
b.Skip("Native artifact scanner not available: ", err)
|
|
}
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
// BenchmarkArtifactScanLarge tests with many files
|
|
func BenchmarkArtifactScanLarge(b *testing.B) {
|
|
tmpDir := b.TempDir()
|
|
|
|
// Create 1000 test files
|
|
createTestArtifacts(b, tmpDir, 1000)
|
|
|
|
b.Run("Go", func(b *testing.B) {
|
|
b.ReportAllocs()
|
|
for b.Loop() {
|
|
_, err := worker.ScanArtifacts(tmpDir, false, nil)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
})
|
|
|
|
b.Run("Native", func(b *testing.B) {
|
|
b.ReportAllocs()
|
|
for b.Loop() {
|
|
_, err := worker.ScanArtifactsNative(tmpDir)
|
|
if err != nil {
|
|
if strings.Contains(err.Error(), "native artifact scanner requires") {
|
|
b.Skip("Native artifact scanner not available: ", err)
|
|
}
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
// createTestArtifacts creates a directory structure with test files
|
|
func createTestArtifacts(b testing.TB, root string, count int) {
|
|
b.Helper()
|
|
|
|
// Create nested directories
|
|
dirs := []string{
|
|
"outputs",
|
|
"outputs/models",
|
|
"outputs/checkpoints",
|
|
"logs",
|
|
"data",
|
|
}
|
|
|
|
for _, dir := range dirs {
|
|
if err := os.MkdirAll(filepath.Join(root, dir), 0750); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
|
|
// Create test files
|
|
for i := range count {
|
|
var path string
|
|
switch i % 5 {
|
|
case 0:
|
|
path = filepath.Join(root, "outputs", "model_"+string(rune('0'+i%10))+".pt")
|
|
case 1:
|
|
path = filepath.Join(root, "outputs", "models", "checkpoint_"+string(rune('0'+i%10))+".ckpt")
|
|
case 2:
|
|
path = filepath.Join(root, "outputs", "checkpoints", "epoch_"+string(rune('0'+i%10))+".pt")
|
|
case 3:
|
|
path = filepath.Join(root, "logs", "train_"+string(rune('0'+i%10))+".log")
|
|
case 4:
|
|
path = filepath.Join(root, "data", "batch_"+string(rune('0'+i%10))+".npy")
|
|
}
|
|
|
|
data := make([]byte, 1024*(i%10+1)) // Varying sizes 1KB-10KB
|
|
for j := range data {
|
|
data[j] = byte(i + j%256)
|
|
}
|
|
|
|
if err := os.WriteFile(path, data, 0640); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
|
|
// Create files that should be excluded
|
|
os.WriteFile(filepath.Join(root, "run_manifest.json"), []byte("{}"), 0640)
|
|
os.MkdirAll(filepath.Join(root, "code"), 0750)
|
|
os.WriteFile(filepath.Join(root, "code", "script.py"), []byte("# test"), 0640)
|
|
}
|