From bb8b1c193e59f6c0a28c00b82281300115c2c8fb Mon Sep 17 00:00:00 2001 From: Mayne0213 Date: Fri, 9 Jan 2026 15:13:44 +0900 Subject: [PATCH] FIX(alertmanager): improve OOMKilled alert detection - Only fire when container restarted in last 10 minutes - Prevent stale alerts from old OOM events --- alertmanager/manifests/oom-alert-rule.yaml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/alertmanager/manifests/oom-alert-rule.yaml b/alertmanager/manifests/oom-alert-rule.yaml index 04e840e..1b56245 100644 --- a/alertmanager/manifests/oom-alert-rule.yaml +++ b/alertmanager/manifests/oom-alert-rule.yaml @@ -12,10 +12,16 @@ spec: rules: - alert: KubeContainerOOMKilled annotations: - description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} was OOMKilled." - summary: "Container was OOMKilled" + description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} was OOMKilled in the last 10 minutes." + summary: "Container was recently OOMKilled" expr: | - kube_pod_container_status_last_terminated_reason{reason="OOMKilled"} == 1 + ( + increase(kube_pod_container_status_restarts_total[10m]) > 0 + ) + and on (namespace, pod, container) + ( + kube_pod_container_status_last_terminated_reason{reason="OOMKilled"} == 1 + ) for: 0m labels: severity: warning