// Package integrity provides data integrity and hashing utilities package integrity import ( "crypto/sha256" "encoding/hex" "fmt" "io" "os" "path/filepath" "runtime" "sort" "strings" "sync" ) // FileSHA256Hex computes SHA256 hash of a single file func FileSHA256Hex(path string) (string, error) { f, err := os.Open(filepath.Clean(path)) if err != nil { return "", err } defer func() { _ = f.Close() }() h := sha256.New() if _, err := io.Copy(h, f); err != nil { return "", err } return fmt.Sprintf("%x", h.Sum(nil)), nil } // NormalizeSHA256ChecksumHex normalizes a SHA256 checksum string func NormalizeSHA256ChecksumHex(checksum string) (string, error) { checksum = strings.TrimSpace(checksum) checksum = strings.TrimPrefix(checksum, "sha256:") checksum = strings.TrimPrefix(checksum, "SHA256:") checksum = strings.TrimSpace(checksum) if checksum == "" { return "", nil } if len(checksum) != 64 { return "", fmt.Errorf("expected sha256 hex length 64, got %d", len(checksum)) } if _, err := hex.DecodeString(checksum); err != nil { return "", fmt.Errorf("invalid sha256 hex: %w", err) } return strings.ToLower(checksum), nil } // DirOverallSHA256Hex computes overall SHA256 of directory contents func DirOverallSHA256Hex(root string) (string, error) { root = filepath.Clean(root) info, err := os.Stat(root) if err != nil { return "", err } if !info.IsDir() { return "", fmt.Errorf("not a directory") } var files []string err = filepath.WalkDir(root, func(path string, d os.DirEntry, walkErr error) error { if walkErr != nil { return walkErr } if d.IsDir() { return nil } rel, err := filepath.Rel(root, path) if err != nil { return err } files = append(files, rel) return nil }) if err != nil { return "", err } // Deterministic order sort.Strings(files) // Hash file hashes to avoid holding all bytes overall := sha256.New() for _, rel := range files { p := filepath.Join(root, rel) sum, err := FileSHA256Hex(p) if err != nil { return "", err } overall.Write([]byte(sum)) } return fmt.Sprintf("%x", overall.Sum(nil)), nil } // DirOverallSHA256HexParallel computes directory hash using parallel workers func DirOverallSHA256HexParallel(root string) (string, error) { root = filepath.Clean(root) info, err := os.Stat(root) if err != nil { return "", err } if !info.IsDir() { return "", fmt.Errorf("not a directory") } // Collect all files with size info var files []string var totalSize int64 err = filepath.WalkDir(root, func(path string, d os.DirEntry, walkErr error) error { if walkErr != nil { return walkErr } if d.IsDir() { return nil } rel, err := filepath.Rel(root, path) if err != nil { return err } files = append(files, rel) // Track total size for optimization decisions if info, err := d.Info(); err == nil { totalSize += info.Size() } return nil }) if err != nil { return "", err } // Sort for deterministic order sort.Strings(files) // Parallel hashing with worker pool numWorkers := runtime.NumCPU() if numWorkers > 8 { numWorkers = 8 } type result struct { index int hash string err error } workCh := make(chan int, len(files)) resultCh := make(chan result, len(files)) var wg sync.WaitGroup for i := 0; i < numWorkers; i++ { wg.Add(1) go func() { defer wg.Done() for idx := range workCh { rel := files[idx] p := filepath.Join(root, rel) hash, err := FileSHA256Hex(p) resultCh <- result{index: idx, hash: hash, err: err} } }() } go func() { for i := range files { workCh <- i } close(workCh) }() go func() { wg.Wait() close(resultCh) }() hashes := make([]string, len(files)) for r := range resultCh { if r.err != nil { return "", r.err } hashes[r.index] = r.hash } // Combine hashes deterministically overall := sha256.New() for _, h := range hashes { overall.Write([]byte(h)) } return fmt.Sprintf("%x", overall.Sum(nil)), nil } // EstimateDirSize returns total size of directory contents in bytes func EstimateDirSize(root string) (int64, error) { root = filepath.Clean(root) info, err := os.Stat(root) if err != nil { return 0, err } if !info.IsDir() { return info.Size(), nil } var totalSize int64 err = filepath.WalkDir(root, func(path string, d os.DirEntry, walkErr error) error { if walkErr != nil { return walkErr } if d.IsDir() { return nil } if info, err := d.Info(); err == nil { totalSize += info.Size() } return nil }) return totalSize, err }