Update worker system for scheduler integration: - Worker server with scheduler registration - Configuration with scheduler endpoint support - Artifact handling with integrity verification - Container executor with supply chain validation - Local executor enhancements - GPU detection improvements (cross-platform) - Error handling with execution context - Factory pattern for executor instantiation - Hash integrity with native library support
218 lines
4.5 KiB
Go
218 lines
4.5 KiB
Go
// Package integrity provides data integrity and hashing utilities
|
|
package integrity
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
)
|
|
|
|
// FileSHA256Hex computes SHA256 hash of a single file
|
|
func FileSHA256Hex(path string) (string, error) {
|
|
f, err := os.Open(filepath.Clean(path))
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer func() { _ = f.Close() }()
|
|
|
|
h := sha256.New()
|
|
if _, err := io.Copy(h, f); err != nil {
|
|
return "", err
|
|
}
|
|
return fmt.Sprintf("%x", h.Sum(nil)), nil
|
|
}
|
|
|
|
// NormalizeSHA256ChecksumHex normalizes a SHA256 checksum string
|
|
func NormalizeSHA256ChecksumHex(checksum string) (string, error) {
|
|
checksum = strings.TrimSpace(checksum)
|
|
checksum = strings.TrimPrefix(checksum, "sha256:")
|
|
checksum = strings.TrimPrefix(checksum, "SHA256:")
|
|
checksum = strings.TrimSpace(checksum)
|
|
if checksum == "" {
|
|
return "", nil
|
|
}
|
|
if len(checksum) != 64 {
|
|
return "", fmt.Errorf("expected sha256 hex length 64, got %d", len(checksum))
|
|
}
|
|
if _, err := hex.DecodeString(checksum); err != nil {
|
|
return "", fmt.Errorf("invalid sha256 hex: %w", err)
|
|
}
|
|
return strings.ToLower(checksum), nil
|
|
}
|
|
|
|
// DirOverallSHA256Hex computes overall SHA256 of directory contents
|
|
func DirOverallSHA256Hex(root string) (string, error) {
|
|
root = filepath.Clean(root)
|
|
info, err := os.Stat(root)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if !info.IsDir() {
|
|
return "", fmt.Errorf("not a directory")
|
|
}
|
|
|
|
var files []string
|
|
err = filepath.WalkDir(root, func(path string, d os.DirEntry, walkErr error) error {
|
|
if walkErr != nil {
|
|
return walkErr
|
|
}
|
|
if d.IsDir() {
|
|
return nil
|
|
}
|
|
rel, err := filepath.Rel(root, path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
files = append(files, rel)
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
// Deterministic order
|
|
sort.Strings(files)
|
|
|
|
// Hash file hashes to avoid holding all bytes
|
|
overall := sha256.New()
|
|
for _, rel := range files {
|
|
p := filepath.Join(root, rel)
|
|
sum, err := FileSHA256Hex(p)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
overall.Write([]byte(sum))
|
|
}
|
|
return fmt.Sprintf("%x", overall.Sum(nil)), nil
|
|
}
|
|
|
|
// DirOverallSHA256HexParallel computes directory hash using parallel workers
|
|
func DirOverallSHA256HexParallel(root string) (string, error) {
|
|
root = filepath.Clean(root)
|
|
info, err := os.Stat(root)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if !info.IsDir() {
|
|
return "", fmt.Errorf("not a directory")
|
|
}
|
|
|
|
// Collect all files with size info
|
|
var files []string
|
|
var totalSize int64
|
|
err = filepath.WalkDir(root, func(path string, d os.DirEntry, walkErr error) error {
|
|
if walkErr != nil {
|
|
return walkErr
|
|
}
|
|
if d.IsDir() {
|
|
return nil
|
|
}
|
|
rel, err := filepath.Rel(root, path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
files = append(files, rel)
|
|
|
|
// Track total size for optimization decisions
|
|
if info, err := d.Info(); err == nil {
|
|
totalSize += info.Size()
|
|
}
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
// Sort for deterministic order
|
|
sort.Strings(files)
|
|
|
|
// Parallel hashing with worker pool
|
|
numWorkers := runtime.NumCPU()
|
|
if numWorkers > 8 {
|
|
numWorkers = 8
|
|
}
|
|
|
|
type result struct {
|
|
err error
|
|
hash string
|
|
index int
|
|
}
|
|
|
|
workCh := make(chan int, len(files))
|
|
resultCh := make(chan result, len(files))
|
|
|
|
var wg sync.WaitGroup
|
|
for i := 0; i < numWorkers; i++ {
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
for idx := range workCh {
|
|
rel := files[idx]
|
|
p := filepath.Join(root, rel)
|
|
hash, err := FileSHA256Hex(p)
|
|
resultCh <- result{index: idx, hash: hash, err: err}
|
|
}
|
|
}()
|
|
}
|
|
|
|
go func() {
|
|
for i := range files {
|
|
workCh <- i
|
|
}
|
|
close(workCh)
|
|
}()
|
|
|
|
go func() {
|
|
wg.Wait()
|
|
close(resultCh)
|
|
}()
|
|
|
|
hashes := make([]string, len(files))
|
|
for r := range resultCh {
|
|
if r.err != nil {
|
|
return "", r.err
|
|
}
|
|
hashes[r.index] = r.hash
|
|
}
|
|
|
|
// Combine hashes deterministically
|
|
overall := sha256.New()
|
|
for _, h := range hashes {
|
|
overall.Write([]byte(h))
|
|
}
|
|
return fmt.Sprintf("%x", overall.Sum(nil)), nil
|
|
}
|
|
|
|
// EstimateDirSize returns total size of directory contents in bytes
|
|
func EstimateDirSize(root string) (int64, error) {
|
|
root = filepath.Clean(root)
|
|
info, err := os.Stat(root)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
if !info.IsDir() {
|
|
return info.Size(), nil
|
|
}
|
|
|
|
var totalSize int64
|
|
err = filepath.WalkDir(root, func(path string, d os.DirEntry, walkErr error) error {
|
|
if walkErr != nil {
|
|
return walkErr
|
|
}
|
|
if d.IsDir() {
|
|
return nil
|
|
}
|
|
if info, err := d.Info(); err == nil {
|
|
totalSize += info.Size()
|
|
}
|
|
return nil
|
|
})
|
|
return totalSize, err
|
|
}
|