Add MaxArtifactFiles and MaxArtifactTotalBytes to SandboxConfig: - Default MaxArtifactFiles: 10,000 (configurable via SecurityDefaults) - Default MaxArtifactTotalBytes: 100GB (configurable via SecurityDefaults) - ApplySecurityDefaults() sets defaults if not specified Enforce caps in scanArtifacts() during directory walk: - Returns error immediately when MaxArtifactFiles exceeded - Returns error immediately when MaxArtifactTotalBytes exceeded - Prevents resource exhaustion attacks from malicious artifact trees Update all call sites to pass SandboxConfig for cap enforcement: - Native bridge libs updated to pass caps argument - Benchmark tests updated with nil caps (unlimited for benchmarks) - Unit tests updated with nil caps Closes: artifact ingestion caps items from security plan
139 lines
3 KiB
Go
139 lines
3 KiB
Go
package benchmarks
|
|
|
|
import (
|
|
"archive/tar"
|
|
"bytes"
|
|
"compress/gzip"
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
|
|
"github.com/jfraeys/fetch_ml/internal/worker"
|
|
)
|
|
|
|
// BenchmarkExtractTarGz profiles the tar.gz extraction hot path.
|
|
// Called during snapshot resolution - streaming I/O with decompression.
|
|
// Tier 1A C++ candidate: parallel decompression, zero-copy extraction.
|
|
func BenchmarkExtractTarGz(b *testing.B) {
|
|
// Create a test tar.gz archive
|
|
tmpDir := b.TempDir()
|
|
archivePath := filepath.Join(tmpDir, "snapshot.tar.gz")
|
|
|
|
// Build archive with realistic contents
|
|
if err := createTestArchive(archivePath); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
|
|
extractDir := filepath.Join(tmpDir, "extracted")
|
|
if err := os.MkdirAll(extractDir, 0750); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
|
|
b.ResetTimer()
|
|
b.ReportAllocs()
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
// Clean extract dir between iterations
|
|
os.RemoveAll(extractDir)
|
|
os.MkdirAll(extractDir, 0750)
|
|
|
|
err := worker.ExtractTarGz(archivePath, extractDir)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func createTestArchive(path string) error {
|
|
var buf bytes.Buffer
|
|
gw := gzip.NewWriter(&buf)
|
|
tw := tar.NewWriter(gw)
|
|
|
|
// Add files of varying sizes
|
|
files := []struct {
|
|
name string
|
|
size int
|
|
}{
|
|
{"train.py", 5000},
|
|
{"requirements.txt", 100},
|
|
{"data/config.json", 500},
|
|
{"checkpoints/model.pt", 10000000}, // 10MB
|
|
{"logs/output.log", 50000},
|
|
}
|
|
|
|
for _, f := range files {
|
|
data := make([]byte, f.size)
|
|
for i := range data {
|
|
data[i] = byte(i % 256)
|
|
}
|
|
|
|
hdr := &tar.Header{
|
|
Name: f.name,
|
|
Size: int64(f.size),
|
|
Mode: 0640,
|
|
}
|
|
if err := tw.WriteHeader(hdr); err != nil {
|
|
return err
|
|
}
|
|
if _, err := tw.Write(data); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if err := tw.Close(); err != nil {
|
|
return err
|
|
}
|
|
if err := gw.Close(); err != nil {
|
|
return err
|
|
}
|
|
|
|
return os.WriteFile(path, buf.Bytes(), 0640)
|
|
}
|
|
|
|
// BenchmarkScanArtifacts profiles the artifact scanning hot path.
|
|
// Uses filepath.WalkDir with repeated d.Info() syscalls.
|
|
// Tier 1A C++ candidate: fts(3) traversal, mmap manifest building.
|
|
func BenchmarkScanArtifacts(b *testing.B) {
|
|
runDir := b.TempDir()
|
|
|
|
// Create realistic run directory structure
|
|
files := []struct {
|
|
path string
|
|
size int
|
|
}{
|
|
{"run_manifest.json", 100},
|
|
{"output.log", 1000},
|
|
{"code/train.py", 5000},
|
|
{"snapshot/model.pt", 100000},
|
|
{"results/metrics.jsonl", 50000},
|
|
{"results/history.csv", 200000},
|
|
{"checkpoints/best.pt", 50000000},
|
|
{"checkpoints/epoch_10.pt", 25000000},
|
|
{"plots/loss.png", 50000},
|
|
{"plots/accuracy.png", 50000},
|
|
}
|
|
|
|
for _, f := range files {
|
|
p := filepath.Join(runDir, f.path)
|
|
if err := os.MkdirAll(filepath.Dir(p), 0750); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
data := make([]byte, f.size)
|
|
for i := range data {
|
|
data[i] = byte(i % 256)
|
|
}
|
|
if err := os.WriteFile(p, data, 0640); err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
|
|
b.ResetTimer()
|
|
b.ReportAllocs()
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
_, err := worker.ScanArtifacts(runDir, false, nil)
|
|
if err != nil {
|
|
b.Fatal(err)
|
|
}
|
|
}
|
|
}
|