{ "id": null, "title": "Worker Resources", "tags": [ "worker", "resources" ], "panels": [ { "id": 1, "title": "CPU Free", "type": "stat", "targets": [ { "expr": "fetchml_resources_cpu_free", "legendFormat": "{{worker_id}}" } ], "gridPos": { "h": 6, "w": 6, "x": 0, "y": 0 } }, { "id": 2, "title": "CPU Total", "type": "stat", "targets": [ { "expr": "fetchml_resources_cpu_total", "legendFormat": "{{worker_id}}" } ], "gridPos": { "h": 6, "w": 6, "x": 6, "y": 0 } }, { "id": 3, "title": "CPU Utilization (%)", "type": "graph", "targets": [ { "expr": "100 * (1 - (fetchml_resources_cpu_free / clamp_min(fetchml_resources_cpu_total, 1)))", "legendFormat": "{{worker_id}}" } ], "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 } }, { "id": 4, "title": "GPU Slots Free", "type": "graph", "targets": [ { "expr": "fetchml_resources_gpu_slots_free", "legendFormat": "{{worker_id}} gpu={{gpu_index}}" } ], "gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 } }, { "id": 5, "title": "Acquire Wait / Timeout (Totals)", "type": "graph", "targets": [ { "expr": "fetchml_resources_acquire_wait_total", "legendFormat": "wait {{worker_id}}" }, { "expr": "fetchml_resources_acquire_timeout_total", "legendFormat": "timeout {{worker_id}}" }, { "expr": "fetchml_resources_acquire_total", "legendFormat": "total {{worker_id}}" } ], "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 } }, { "id": 6, "title": "Avg Acquire Wait (seconds)", "type": "stat", "targets": [ { "expr": "fetchml_resources_acquire_wait_seconds_total / clamp_min(fetchml_resources_acquire_wait_total, 1)", "legendFormat": "{{worker_id}}" } ], "gridPos": { "h": 6, "w": 6, "x": 0, "y": 14 } }, { "id": 7, "title": "Acquire Wait Ratio", "type": "stat", "targets": [ { "expr": "fetchml_resources_acquire_wait_total / clamp_min(fetchml_resources_acquire_total, 1)", "legendFormat": "{{worker_id}}" } ], "gridPos": { "h": 6, "w": 6, "x": 6, "y": 14 } }, { "id": 8, "title": "Environment Prewarm Hit Rate (%)", "type": "stat", "targets": [ { "expr": "100 * (fetchml_prewarm_env_hit_total / clamp_min(fetchml_prewarm_env_hit_total + fetchml_prewarm_env_miss_total, 1))", "legendFormat": "{{worker_id}}" } ], "gridPos": { "h": 6, "w": 6, "x": 12, "y": 14 }, "fieldConfig": { "defaults": { "unit": "percent", "thresholds": { "steps": [ {"color": "red", "value": 0}, {"color": "yellow", "value": 50}, {"color": "green", "value": 80} ] } } } }, { "id": 9, "title": "Snapshot Prewarm Hit Rate (%)", "type": "stat", "targets": [ { "expr": "100 * (fetchml_prewarm_snapshot_hit_total / clamp_min(fetchml_prewarm_snapshot_hit_total + fetchml_prewarm_snapshot_miss_total, 1))", "legendFormat": "{{worker_id}}" } ], "gridPos": { "h": 6, "w": 6, "x": 18, "y": 14 }, "fieldConfig": { "defaults": { "unit": "percent", "thresholds": { "steps": [ {"color": "red", "value": 0}, {"color": "yellow", "value": 50}, {"color": "green", "value": 80} ] } } } }, { "id": 10, "title": "Prewarm Hits vs Misses", "type": "graph", "targets": [ { "expr": "rate(fetchml_prewarm_env_hit_total[5m])", "legendFormat": "env hits {{worker_id}}" }, { "expr": "rate(fetchml_prewarm_env_miss_total[5m])", "legendFormat": "env misses {{worker_id}}" }, { "expr": "rate(fetchml_prewarm_snapshot_hit_total[5m])", "legendFormat": "snapshot hits {{worker_id}}" }, { "expr": "rate(fetchml_prewarm_snapshot_miss_total[5m])", "legendFormat": "snapshot misses {{worker_id}}" } ], "gridPos": { "h": 8, "w": 24, "x": 0, "y": 20 }, "yAxes": [ {"unit": "reqps"} ] }, { "id": 11, "title": "Prewarm Build Time", "type": "graph", "targets": [ { "expr": "rate(fetchml_prewarm_env_time_seconds_total[5m])", "legendFormat": "env build {{worker_id}}" }, { "expr": "rate(fetchml_prewarm_snapshot_time_seconds_total[5m])", "legendFormat": "snapshot prewarm {{worker_id}}" } ], "gridPos": { "h": 8, "w": 12, "x": 0, "y": 28 }, "yAxes": [ {"unit": "seconds"} ] }, { "id": 12, "title": "Prewarm Builds", "type": "graph", "targets": [ { "expr": "increase(fetchml_prewarm_env_built_total[1h])", "legendFormat": "env built {{worker_id}}" }, { "expr": "increase(fetchml_prewarm_snapshot_built_total[1h])", "legendFormat": "snapshots prewarmed {{worker_id}}" } ], "gridPos": { "h": 8, "w": 12, "x": 12, "y": 28 }, "yAxes": [ {"unit": "short"} ] } ], "time": { "from": "now-1h", "to": "now" }, "refresh": "5s" }