- from local-path to Longhorn - Grafana: 2Gi (replica=3) - Loki: 10Gi (replica=3) - Alertmanager: 1Gi (replica=3) - Prometheus: 5Gi (replica=3) - Use dedicated 50GB Longhorn storage on each node
55 lines
1.2 KiB
YAML
55 lines
1.2 KiB
YAML
# Alertmanager Helm Values
|
|
# Chart: https://github.com/prometheus-community/helm-charts/tree/main/charts/alertmanager
|
|
|
|
fullnameOverride: alertmanager
|
|
|
|
persistence:
|
|
enabled: true
|
|
size: 1Gi
|
|
storageClass: longhorn # Migrated from local-path to Longhorn (replica=3)
|
|
|
|
resources:
|
|
requests:
|
|
cpu: 10m
|
|
memory: 32Mi
|
|
|
|
# Prometheus ServiceMonitor 설정
|
|
serviceMonitor:
|
|
enabled: true
|
|
additionalLabels:
|
|
release: prometheus
|
|
namespace: monitoring
|
|
|
|
config:
|
|
global:
|
|
resolve_timeout: 5m
|
|
route:
|
|
group_by: ["alertname", "cluster", "service"]
|
|
group_wait: 10s
|
|
group_interval: 10s
|
|
repeat_interval: 12h
|
|
receiver: "default"
|
|
routes:
|
|
- match:
|
|
severity: critical
|
|
receiver: "critical"
|
|
continue: true
|
|
- match:
|
|
severity: warning
|
|
receiver: "warning"
|
|
receivers:
|
|
- name: "default"
|
|
# 기본 수신자 (로그만 남김)
|
|
- name: "critical"
|
|
# TODO: Slack, Email 등 알림 채널 추가
|
|
# webhook_configs:
|
|
# - url: 'http://your-webhook-url'
|
|
- name: "warning"
|
|
# TODO: 경고 알림 채널 추가
|
|
inhibit_rules:
|
|
- source_match:
|
|
severity: "critical"
|
|
target_match:
|
|
severity: "warning"
|
|
equal: ["alertname", "cluster", "service"]
|