- Only fire when container restarted in last 10 minutes - Prevent stale alerts from old OOM events
28 lines
779 B
YAML
28 lines
779 B
YAML
apiVersion: monitoring.coreos.com/v1
|
|
kind: PrometheusRule
|
|
metadata:
|
|
name: oom-alerts
|
|
namespace: prometheus
|
|
labels:
|
|
app: kube-prometheus-stack
|
|
release: prometheus
|
|
spec:
|
|
groups:
|
|
- name: oom.rules
|
|
rules:
|
|
- alert: KubeContainerOOMKilled
|
|
annotations:
|
|
description: "Container {{ $labels.container }} in pod {{ $labels.namespace }}/{{ $labels.pod }} was OOMKilled in the last 10 minutes."
|
|
summary: "Container was recently OOMKilled"
|
|
expr: |
|
|
(
|
|
increase(kube_pod_container_status_restarts_total[10m]) > 0
|
|
)
|
|
and on (namespace, pod, container)
|
|
(
|
|
kube_pod_container_status_last_terminated_reason{reason="OOMKilled"} == 1
|
|
)
|
|
for: 0m
|
|
labels:
|
|
severity: warning
|