fetch_ml/internal/worker/native_bridge_libs.go
Jeremie Fraeys e557313e08
fix: context reuse benchmark uses temp directory
- Replace hardcoded testdata path with b.TempDir()
- Add createSmallDataset helper for self-contained benchmarks
- Fixes FAIL: BenchmarkContextReuse / BenchmarkSequentialHashes
2026-02-21 14:38:00 -05:00

85 lines
2.3 KiB
Go

//go:build cgo && native_libs
// +build cgo,native_libs
package worker
// #cgo LDFLAGS: -L${SRCDIR}/../../native/build -Wl,-rpath,${SRCDIR}/../../native/build -ldataset_hash
// #include "../../native/dataset_hash/dataset_hash.h"
// #include <stdlib.h>
import "C"
import (
"errors"
"log"
"runtime"
"sync"
"time"
"unsafe"
"github.com/jfraeys/fetch_ml/internal/manifest"
)
var (
hashCtx *C.fh_context_t
hashCtxOnce sync.Once
ctxInitTime time.Time
)
// getHashContext returns a cached hash context, initializing it once.
// Context reuse eliminates 5-20ms of thread pool creation per hash operation.
func getHashContext() *C.fh_context_t {
hashCtxOnce.Do(func() {
start := time.Now()
hashCtx = C.fh_init(C.uint32_t(runtime.NumCPU()))
ctxInitTime = time.Now()
log.Printf("[native] hash context initialized: %v (threads: %d)",
time.Since(start), runtime.NumCPU())
})
return hashCtx
}
// dirOverallSHA256HexNative implementation with native library.
func dirOverallSHA256HexNative(root string) (string, error) {
ctx := getHashContext() // Reuse cached context: ~0.1μs vs 5-20ms
croot := C.CString(root)
defer C.free(unsafe.Pointer(croot))
result := C.fh_hash_directory_combined(ctx, croot)
if result == nil {
err := C.fh_last_error(ctx)
if err != nil {
return "", errors.New(C.GoString(err))
}
return "", errors.New("native hash failed")
}
defer C.fh_free_string(result)
return C.GoString(result), nil
}
// GetSIMDImplName returns the native SHA256 implementation name.
func GetSIMDImplName() string {
return C.GoString(C.fh_get_simd_impl_name())
}
// HasSIMDSHA256 returns true if SIMD SHA256 is available.
func HasSIMDSHA256() bool {
return C.fh_has_simd_sha256() == 1
}
// ScanArtifactsNative falls back to Go implementation.
func ScanArtifactsNative(runDir string) (*manifest.Artifacts, error) {
return ScanArtifacts(runDir)
}
// ExtractTarGzNative falls back to Go implementation.
func ExtractTarGzNative(archivePath, dstDir string) error {
return ExtractTarGz(archivePath, dstDir)
}
// DirOverallSHA256HexNative exports the native hash implementation for benchmarks.
// This allows explicit native library usage when -tags native_libs is enabled.
func DirOverallSHA256HexNative(root string) (string, error) {
return dirOverallSHA256HexNative(root)
}