fetch_ml/internal/worker/native_bridge_libs.go
Jeremie Fraeys 9434f4c8e6
feat(security): Artifact ingestion caps enforcement
Add MaxArtifactFiles and MaxArtifactTotalBytes to SandboxConfig:
- Default MaxArtifactFiles: 10,000 (configurable via SecurityDefaults)
- Default MaxArtifactTotalBytes: 100GB (configurable via SecurityDefaults)
- ApplySecurityDefaults() sets defaults if not specified

Enforce caps in scanArtifacts() during directory walk:
- Returns error immediately when MaxArtifactFiles exceeded
- Returns error immediately when MaxArtifactTotalBytes exceeded
- Prevents resource exhaustion attacks from malicious artifact trees

Update all call sites to pass SandboxConfig for cap enforcement:
- Native bridge libs updated to pass caps argument
- Benchmark tests updated with nil caps (unlimited for benchmarks)
- Unit tests updated with nil caps

Closes: artifact ingestion caps items from security plan
2026-02-23 19:43:28 -05:00

80 lines
2 KiB
Go

//go:build cgo && native_libs
// +build cgo,native_libs
package worker
// #cgo darwin LDFLAGS: -L${SRCDIR}/../../native/build -Wl,-rpath,${SRCDIR}/../../native/build -ldataset_hash
// #cgo linux LDFLAGS: -L${SRCDIR}/../../native/build -Wl,-rpath,${SRCDIR}/../../native/build -ldataset_hash -lnvml_gpu -lnvidia-ml
// #include "../../native/dataset_hash/dataset_hash.h"
// #include <stdlib.h>
import "C"
import (
"errors"
"log"
"runtime"
"sync"
"time"
"unsafe"
"github.com/jfraeys/fetch_ml/internal/manifest"
)
var (
hashCtx *C.fh_context_t
hashCtxOnce sync.Once
ctxInitTime time.Time
)
// getHashContext returns the native hash context, initializing it on first call.
// First call initializes C++ context (5-20ms) - subsequent calls reuse context.
func getHashContext() *C.fh_context_t {
hashCtxOnce.Do(func() {
start := time.Now()
hashCtx = C.fh_init(C.uint32_t(runtime.NumCPU()))
ctxInitTime = time.Now()
log.Printf("[native] hash context initialized: %v (threads: %d)",
time.Since(start), runtime.NumCPU())
})
return hashCtx
}
func dirOverallSHA256HexNative(root string) (string, error) {
ctx := getHashContext()
croot := C.CString(root)
defer C.free(unsafe.Pointer(croot))
result := C.fh_hash_directory_combined(ctx, croot)
if result == nil {
err := C.fh_last_error(ctx)
if err != nil {
return "", errors.New(C.GoString(err))
}
return "", errors.New("native hash failed")
}
defer C.fh_free_string(result)
return C.GoString(result), nil
}
func GetSIMDImplName() string {
return C.GoString(C.fh_get_simd_impl_name())
}
func HasSIMDSHA256() bool {
return C.fh_has_simd_sha256() == 1
}
func ScanArtifactsNative(runDir string) (*manifest.Artifacts, error) {
return ScanArtifacts(runDir, false, nil)
}
func ExtractTarGzNative(archivePath, dstDir string) error {
return ExtractTarGz(archivePath, dstDir)
}
// DirOverallSHA256HexNative exports the native hash implementation for benchmarks.
func DirOverallSHA256HexNative(root string) (string, error) {
return dirOverallSHA256HexNative(root)
}