fetch_ml/internal/envpool/envpool.go

288 lines
6.1 KiB
Go

package envpool
import (
"context"
"crypto/rand"
"encoding/hex"
"errors"
"fmt"
"os/exec"
"strings"
"sync"
"time"
)
type CommandRunner interface {
CombinedOutput(ctx context.Context, name string, args ...string) ([]byte, error)
}
type execRunner struct{}
func (r execRunner) CombinedOutput(
ctx context.Context,
name string,
args ...string,
) ([]byte, error) {
cmd := exec.CommandContext(ctx, name, args...)
return cmd.CombinedOutput()
}
type Pool struct {
runner CommandRunner
imagePrefix string
cacheMu sync.Mutex
cache map[string]cacheEntry
cacheTTL time.Duration
}
type cacheEntry struct {
exists bool
expires time.Time
}
func New(imagePrefix string) *Pool {
prefix := strings.TrimSpace(imagePrefix)
if prefix == "" {
prefix = "fetchml-prewarm"
}
return &Pool{
runner: execRunner{},
imagePrefix: prefix,
cache: make(map[string]cacheEntry),
cacheTTL: 30 * time.Second,
}
}
func (p *Pool) WithRunner(r CommandRunner) *Pool {
if r != nil {
p.runner = r
}
return p
}
func (p *Pool) WithCacheTTL(ttl time.Duration) *Pool {
if ttl > 0 {
p.cacheTTL = ttl
}
return p
}
func (p *Pool) WarmImageTag(depsManifestSHA256 string) (string, error) {
sha := strings.TrimSpace(depsManifestSHA256)
if sha == "" {
return "", fmt.Errorf("missing deps sha256")
}
if !isLowerHexLen(sha, 64) {
return "", fmt.Errorf("invalid deps sha256")
}
return fmt.Sprintf("%s:%s", p.imagePrefix, sha[:12]), nil
}
func (p *Pool) ImageExists(ctx context.Context, imageRef string) (bool, error) {
ref := strings.TrimSpace(imageRef)
if ref == "" {
return false, fmt.Errorf("missing image ref")
}
p.cacheMu.Lock()
if ent, ok := p.cache[ref]; ok && time.Now().Before(ent.expires) {
exists := ent.exists
p.cacheMu.Unlock()
return exists, nil
}
p.cacheMu.Unlock()
out, err := p.runner.CombinedOutput(ctx, "podman", "image", "inspect", ref)
if err == nil {
p.setCache(ref, true)
return true, nil
}
if looksLikeImageNotFound(out) {
p.setCache(ref, false)
return false, nil
}
var ee *exec.ExitError
if errors.As(err, &ee) {
p.setCache(ref, false)
return false, nil
}
return false, err
}
func looksLikeImageNotFound(out []byte) bool {
s := strings.ToLower(strings.TrimSpace(string(out)))
if s == "" {
return false
}
return strings.Contains(s, "no such") ||
strings.Contains(s, "not found") ||
strings.Contains(s, "does not exist")
}
func (p *Pool) setCache(imageRef string, exists bool) {
p.cacheMu.Lock()
p.cache[imageRef] = cacheEntry{exists: exists, expires: time.Now().Add(p.cacheTTL)}
p.cacheMu.Unlock()
}
type PrepareRequest struct {
BaseImage string
TargetImage string
HostWorkspace string
ContainerWorkspace string
DepsPathInContainer string
}
func (p *Pool) PruneImages(ctx context.Context, olderThan time.Duration) error {
if olderThan <= 0 {
return fmt.Errorf("invalid olderThan")
}
h := int(olderThan.Round(time.Hour).Hours())
if h < 1 {
h = 1
}
until := fmt.Sprintf("%dh", h)
_, err := p.runner.CombinedOutput(
ctx,
"podman",
"image",
"prune",
"-a",
"-f",
"--filter",
"label=fetchml.prewarm=true",
"--filter",
"until="+until,
)
return err
}
func (p *Pool) Prepare(ctx context.Context, req PrepareRequest) error {
baseImage := strings.TrimSpace(req.BaseImage)
targetImage := strings.TrimSpace(req.TargetImage)
hostWS := strings.TrimSpace(req.HostWorkspace)
containerWS := strings.TrimSpace(req.ContainerWorkspace)
depsInContainer := strings.TrimSpace(req.DepsPathInContainer)
if baseImage == "" {
return fmt.Errorf("missing base image")
}
if targetImage == "" {
return fmt.Errorf("missing target image")
}
if hostWS == "" {
return fmt.Errorf("missing host workspace")
}
if containerWS == "" {
return fmt.Errorf("missing container workspace")
}
if depsInContainer == "" {
return fmt.Errorf("missing deps path")
}
if !strings.HasPrefix(depsInContainer, containerWS) {
return fmt.Errorf("deps path must be under container workspace")
}
exists, err := p.ImageExists(ctx, targetImage)
if err != nil {
return err
}
if exists {
return nil
}
containerName, err := randomContainerName("fetchml-prewarm")
if err != nil {
return err
}
// Do not use --rm since we need a container to commit.
runArgs := []string{
"run",
"--name", containerName,
"--security-opt", "no-new-privileges",
"--cap-drop", "ALL",
"--userns", "keep-id",
"-v", fmt.Sprintf("%s:%s:rw", hostWS, containerWS),
baseImage,
"--workspace", containerWS,
"--deps", depsInContainer,
"--prepare-only",
}
if out, err := p.runner.CombinedOutput(ctx, "podman", runArgs...); err != nil {
_ = p.cleanupContainer(context.Background(), containerName)
return fmt.Errorf("podman run prewarm failed: %w", scrubOutput(out, err))
}
if out, err := p.runner.CombinedOutput(
ctx,
"podman",
"commit",
containerName,
targetImage,
); err != nil {
_ = p.cleanupContainer(context.Background(), containerName)
return fmt.Errorf("podman commit prewarm failed: %w", scrubOutput(out, err))
}
_, _ = p.runner.CombinedOutput(
ctx,
"podman",
"image",
"label",
targetImage,
"fetchml.prewarm=true",
)
_ = p.cleanupContainer(context.Background(), containerName)
p.setCache(targetImage, true)
return nil
}
func (p *Pool) cleanupContainer(ctx context.Context, name string) error {
n := strings.TrimSpace(name)
if n == "" {
return nil
}
_, err := p.runner.CombinedOutput(ctx, "podman", "rm", n)
return err
}
func randomContainerName(prefix string) (string, error) {
p := strings.TrimSpace(prefix)
if p == "" {
p = "fetchml-prewarm"
}
b := make([]byte, 6)
if _, err := rand.Read(b); err != nil {
return "", err
}
return fmt.Sprintf("%s-%s", p, hex.EncodeToString(b)), nil
}
func isLowerHexLen(s string, want int) bool {
if len(s) != want {
return false
}
for i := 0; i < len(s); i++ {
c := s[i]
if (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') {
continue
}
return false
}
return true
}
func scrubOutput(out []byte, err error) error {
if len(out) == 0 {
return err
}
s := strings.TrimSpace(string(out))
if len(s) > 400 {
s = s[:400]
}
return fmt.Errorf("%w (output=%q)", err, s)
}