Files
observability/alertmanager/manifests/oom-alert-rule.yaml
Mayne0213 bb8b1c193e FIX(alertmanager): improve OOMKilled alert detection
- Only fire when container restarted in last 10 minutes
- Prevent stale alerts from old OOM events
2026-01-09 21:42:35 +09:00

28 lines
779 B
YAML

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: oom-alerts
namespace: prometheus
labels:
app: kube-prometheus-stack
release: prometheus
spec:
groups:
- name: oom.rules
rules:
- alert: KubeContainerOOMKilled
annotations:
description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} was OOMKilled in the last 10 minutes."
summary: "Container was recently OOMKilled"
expr: |
(
increase(kube_pod_container_status_restarts_total[10m]) > 0
)
and on (namespace, pod, container)
(
kube_pod_container_status_last_terminated_reason{reason="OOMKilled"} == 1
)
for: 0m
labels:
severity: warning