FIX(grafana): use kube-state-metrics for OOM detection

- Replace container_oom_events_total with kube_pod_container_status_last_terminated_reason
- Fix OOM events not showing after pod restart
- cAdvisor metric resets on pod restart, kube-state-metrics persists
This commit is contained in:
2026-01-09 15:04:24 +09:00
parent 14bd244b98
commit 539f4be497

View File

@@ -2446,7 +2446,7 @@
}, },
"editorMode": "code", "editorMode": "code",
"exemplar": true, "exemplar": true,
"expr": "sum(increase(container_oom_events_total{cluster=\"$cluster\"}[$__rate_interval])) by (namespace) > 0 or vector(0)", "expr": "count by (namespace) (kube_pod_container_status_last_terminated_reason{cluster=\"$cluster\", reason=\"OOMKilled\"} == 1) or vector(0)",
"interval": "", "interval": "",
"legendFormat": "{{ namespace }}", "legendFormat": "{{ namespace }}",
"range": true, "range": true,