Add MaxArtifactFiles and MaxArtifactTotalBytes to SandboxConfig: - Default MaxArtifactFiles: 10,000 (configurable via SecurityDefaults) - Default MaxArtifactTotalBytes: 100GB (configurable via SecurityDefaults) - ApplySecurityDefaults() sets defaults if not specified Enforce caps in scanArtifacts() during directory walk: - Returns error immediately when MaxArtifactFiles exceeded - Returns error immediately when MaxArtifactTotalBytes exceeded - Prevents resource exhaustion attacks from malicious artifact trees Update all call sites to pass SandboxConfig for cap enforcement: - Native bridge libs updated to pass caps argument - Benchmark tests updated with nil caps (unlimited for benchmarks) - Unit tests updated with nil caps Closes: artifact ingestion caps items from security plan
80 lines
2 KiB
Go
80 lines
2 KiB
Go
//go:build cgo && native_libs
|
|
// +build cgo,native_libs
|
|
|
|
package worker
|
|
|
|
// #cgo darwin LDFLAGS: -L${SRCDIR}/../../native/build -Wl,-rpath,${SRCDIR}/../../native/build -ldataset_hash
|
|
// #cgo linux LDFLAGS: -L${SRCDIR}/../../native/build -Wl,-rpath,${SRCDIR}/../../native/build -ldataset_hash -lnvml_gpu -lnvidia-ml
|
|
// #include "../../native/dataset_hash/dataset_hash.h"
|
|
// #include <stdlib.h>
|
|
import "C"
|
|
|
|
import (
|
|
"errors"
|
|
"log"
|
|
"runtime"
|
|
"sync"
|
|
"time"
|
|
"unsafe"
|
|
|
|
"github.com/jfraeys/fetch_ml/internal/manifest"
|
|
)
|
|
|
|
var (
|
|
hashCtx *C.fh_context_t
|
|
hashCtxOnce sync.Once
|
|
ctxInitTime time.Time
|
|
)
|
|
|
|
// getHashContext returns the native hash context, initializing it on first call.
|
|
// First call initializes C++ context (5-20ms) - subsequent calls reuse context.
|
|
func getHashContext() *C.fh_context_t {
|
|
hashCtxOnce.Do(func() {
|
|
start := time.Now()
|
|
hashCtx = C.fh_init(C.uint32_t(runtime.NumCPU()))
|
|
ctxInitTime = time.Now()
|
|
log.Printf("[native] hash context initialized: %v (threads: %d)",
|
|
time.Since(start), runtime.NumCPU())
|
|
})
|
|
return hashCtx
|
|
}
|
|
|
|
func dirOverallSHA256HexNative(root string) (string, error) {
|
|
ctx := getHashContext()
|
|
|
|
croot := C.CString(root)
|
|
defer C.free(unsafe.Pointer(croot))
|
|
|
|
result := C.fh_hash_directory_combined(ctx, croot)
|
|
if result == nil {
|
|
err := C.fh_last_error(ctx)
|
|
if err != nil {
|
|
return "", errors.New(C.GoString(err))
|
|
}
|
|
return "", errors.New("native hash failed")
|
|
}
|
|
defer C.fh_free_string(result)
|
|
|
|
return C.GoString(result), nil
|
|
}
|
|
|
|
func GetSIMDImplName() string {
|
|
return C.GoString(C.fh_get_simd_impl_name())
|
|
}
|
|
|
|
func HasSIMDSHA256() bool {
|
|
return C.fh_has_simd_sha256() == 1
|
|
}
|
|
|
|
func ScanArtifactsNative(runDir string) (*manifest.Artifacts, error) {
|
|
return ScanArtifacts(runDir, false, nil)
|
|
}
|
|
|
|
func ExtractTarGzNative(archivePath, dstDir string) error {
|
|
return ExtractTarGz(archivePath, dstDir)
|
|
}
|
|
|
|
// DirOverallSHA256HexNative exports the native hash implementation for benchmarks.
|
|
func DirOverallSHA256HexNative(root string) (string, error) {
|
|
return dirOverallSHA256HexNative(root)
|
|
}
|