FIX(grafana): use kube-state-metrics for OOM detection
- Replace container_oom_events_total with kube_pod_container_status_last_terminated_reason - Fix OOM events not showing after pod restart - cAdvisor metric resets on pod restart, kube-state-metrics persists
This commit is contained in:
@@ -2446,7 +2446,7 @@
|
|||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"exemplar": true,
|
"exemplar": true,
|
||||||
"expr": "sum(increase(container_oom_events_total{cluster=\"$cluster\"}[$__rate_interval])) by (namespace) > 0 or vector(0)",
|
"expr": "count by (namespace) (kube_pod_container_status_last_terminated_reason{cluster=\"$cluster\", reason=\"OOMKilled\"} == 1) or vector(0)",
|
||||||
"interval": "",
|
"interval": "",
|
||||||
"legendFormat": "{{ namespace }}",
|
"legendFormat": "{{ namespace }}",
|
||||||
"range": true,
|
"range": true,
|
||||||
|
|||||||
Reference in New Issue
Block a user