INIT(repo): monitoring stack setup
This commit is contained in:
46
alertmanager/argocd/alertmanager.yaml
Normal file
46
alertmanager/argocd/alertmanager.yaml
Normal file
@@ -0,0 +1,46 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: alertmanager
|
||||
namespace: argocd
|
||||
finalizers:
|
||||
- resources-finalizer.argocd.argoproj.io
|
||||
spec:
|
||||
project: default
|
||||
|
||||
sources:
|
||||
# Helm chart from external repository
|
||||
- repoURL: https://prometheus-community.github.io/helm-charts
|
||||
chart: alertmanager
|
||||
targetRevision: 1.29.0
|
||||
helm:
|
||||
valueFiles:
|
||||
- $values/alertmanager/helm-values/alertmanager.yaml
|
||||
# Values file from Git repository
|
||||
- repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git
|
||||
targetRevision: main
|
||||
ref: values
|
||||
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: monitoring
|
||||
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
allowEmpty: false
|
||||
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
- PrunePropagationPolicy=foreground
|
||||
- PruneLast=true
|
||||
|
||||
retry:
|
||||
limit: 5
|
||||
backoff:
|
||||
duration: 5s
|
||||
factor: 2
|
||||
maxDuration: 3m
|
||||
|
||||
revisionHistoryLimit: 10
|
||||
54
alertmanager/helm-values/alertmanager.yaml
Normal file
54
alertmanager/helm-values/alertmanager.yaml
Normal file
@@ -0,0 +1,54 @@
|
||||
# Alertmanager Helm Values
|
||||
# Chart: https://github.com/prometheus-community/helm-charts/tree/main/charts/alertmanager
|
||||
|
||||
fullnameOverride: alertmanager
|
||||
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 1Gi
|
||||
storageClass: local-path
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 32Mi
|
||||
|
||||
# Prometheus ServiceMonitor 설정
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
additionalLabels:
|
||||
release: prometheus
|
||||
namespace: monitoring
|
||||
|
||||
config:
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
route:
|
||||
group_by: ["alertname", "cluster", "service"]
|
||||
group_wait: 10s
|
||||
group_interval: 10s
|
||||
repeat_interval: 12h
|
||||
receiver: "default"
|
||||
routes:
|
||||
- match:
|
||||
severity: critical
|
||||
receiver: "critical"
|
||||
continue: true
|
||||
- match:
|
||||
severity: warning
|
||||
receiver: "warning"
|
||||
receivers:
|
||||
- name: "default"
|
||||
# 기본 수신자 (로그만 남김)
|
||||
- name: "critical"
|
||||
# TODO: Slack, Email 등 알림 채널 추가
|
||||
# webhook_configs:
|
||||
# - url: 'http://your-webhook-url'
|
||||
- name: "warning"
|
||||
# TODO: 경고 알림 채널 추가
|
||||
inhibit_rules:
|
||||
- source_match:
|
||||
severity: "critical"
|
||||
target_match:
|
||||
severity: "warning"
|
||||
equal: ["alertname", "cluster", "service"]
|
||||
6
alertmanager/kustomization.yaml
Normal file
6
alertmanager/kustomization.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
# ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리
|
||||
# - argocd/alertmanager.yaml
|
||||
Reference in New Issue
Block a user