INIT(repo): monitoring stack setup
This commit is contained in:
46
alertmanager/argocd/alertmanager.yaml
Normal file
46
alertmanager/argocd/alertmanager.yaml
Normal file
@@ -0,0 +1,46 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: alertmanager
|
||||
namespace: argocd
|
||||
finalizers:
|
||||
- resources-finalizer.argocd.argoproj.io
|
||||
spec:
|
||||
project: default
|
||||
|
||||
sources:
|
||||
# Helm chart from external repository
|
||||
- repoURL: https://prometheus-community.github.io/helm-charts
|
||||
chart: alertmanager
|
||||
targetRevision: 1.29.0
|
||||
helm:
|
||||
valueFiles:
|
||||
- $values/alertmanager/helm-values/alertmanager.yaml
|
||||
# Values file from Git repository
|
||||
- repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git
|
||||
targetRevision: main
|
||||
ref: values
|
||||
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: monitoring
|
||||
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
allowEmpty: false
|
||||
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
- PrunePropagationPolicy=foreground
|
||||
- PruneLast=true
|
||||
|
||||
retry:
|
||||
limit: 5
|
||||
backoff:
|
||||
duration: 5s
|
||||
factor: 2
|
||||
maxDuration: 3m
|
||||
|
||||
revisionHistoryLimit: 10
|
||||
54
alertmanager/helm-values/alertmanager.yaml
Normal file
54
alertmanager/helm-values/alertmanager.yaml
Normal file
@@ -0,0 +1,54 @@
|
||||
# Alertmanager Helm Values
|
||||
# Chart: https://github.com/prometheus-community/helm-charts/tree/main/charts/alertmanager
|
||||
|
||||
fullnameOverride: alertmanager
|
||||
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 1Gi
|
||||
storageClass: local-path
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 32Mi
|
||||
|
||||
# Prometheus ServiceMonitor 설정
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
additionalLabels:
|
||||
release: prometheus
|
||||
namespace: monitoring
|
||||
|
||||
config:
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
route:
|
||||
group_by: ["alertname", "cluster", "service"]
|
||||
group_wait: 10s
|
||||
group_interval: 10s
|
||||
repeat_interval: 12h
|
||||
receiver: "default"
|
||||
routes:
|
||||
- match:
|
||||
severity: critical
|
||||
receiver: "critical"
|
||||
continue: true
|
||||
- match:
|
||||
severity: warning
|
||||
receiver: "warning"
|
||||
receivers:
|
||||
- name: "default"
|
||||
# 기본 수신자 (로그만 남김)
|
||||
- name: "critical"
|
||||
# TODO: Slack, Email 등 알림 채널 추가
|
||||
# webhook_configs:
|
||||
# - url: 'http://your-webhook-url'
|
||||
- name: "warning"
|
||||
# TODO: 경고 알림 채널 추가
|
||||
inhibit_rules:
|
||||
- source_match:
|
||||
severity: "critical"
|
||||
target_match:
|
||||
severity: "warning"
|
||||
equal: ["alertname", "cluster", "service"]
|
||||
6
alertmanager/kustomization.yaml
Normal file
6
alertmanager/kustomization.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
# ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리
|
||||
# - argocd/alertmanager.yaml
|
||||
50
grafana/argocd/grafana.yaml
Normal file
50
grafana/argocd/grafana.yaml
Normal file
@@ -0,0 +1,50 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: grafana
|
||||
namespace: argocd
|
||||
finalizers:
|
||||
- resources-finalizer.argocd.argoproj.io
|
||||
spec:
|
||||
project: default
|
||||
|
||||
sources:
|
||||
# Helm chart from external repository
|
||||
- repoURL: https://grafana.github.io/helm-charts
|
||||
chart: grafana
|
||||
targetRevision: 10.3.0
|
||||
helm:
|
||||
valueFiles:
|
||||
- $values/grafana/helm-values/grafana.yaml
|
||||
# Values file from Git repository
|
||||
- repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git
|
||||
targetRevision: main
|
||||
ref: values
|
||||
# Vault secrets from Git repository
|
||||
- repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git
|
||||
targetRevision: main
|
||||
path: grafana
|
||||
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: monitoring
|
||||
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
allowEmpty: false
|
||||
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
- PrunePropagationPolicy=foreground
|
||||
- PruneLast=true
|
||||
|
||||
retry:
|
||||
limit: 5
|
||||
backoff:
|
||||
duration: 5s
|
||||
factor: 2
|
||||
maxDuration: 3m
|
||||
|
||||
revisionHistoryLimit: 10
|
||||
63
grafana/helm-values/grafana.yaml
Normal file
63
grafana/helm-values/grafana.yaml
Normal file
@@ -0,0 +1,63 @@
|
||||
# Grafana Helm Values
|
||||
# Chart: https://github.com/grafana/helm-charts/tree/main/charts/grafana
|
||||
|
||||
fullnameOverride: grafana
|
||||
|
||||
admin:
|
||||
user: bluemayne
|
||||
# Password is managed via SealedSecret: grafana-admin-password
|
||||
existingSecret: grafana-admin-password
|
||||
userKey: admin-user
|
||||
passwordKey: admin-password
|
||||
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 2Gi
|
||||
storageClass: local-path
|
||||
|
||||
initChownData:
|
||||
enabled: false
|
||||
|
||||
podSecurityContext:
|
||||
fsGroup: 472
|
||||
fsGroupChangePolicy: "Always"
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 25m
|
||||
memory: 128Mi
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 80
|
||||
|
||||
datasources:
|
||||
datasources.yaml:
|
||||
apiVersion: 1
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus-kube-prometheus-prometheus:9090
|
||||
isDefault: true
|
||||
editable: true
|
||||
- name: Loki
|
||||
type: loki
|
||||
access: proxy
|
||||
url: http://loki.logging.svc.cluster.local:3100
|
||||
editable: true
|
||||
|
||||
grafana.ini:
|
||||
server:
|
||||
root_url: "http://grafana0213.kro.kr"
|
||||
auth.anonymous:
|
||||
enabled: false
|
||||
security:
|
||||
allow_embedding: true
|
||||
auth.basic:
|
||||
enabled: false
|
||||
auth:
|
||||
disable_login_form: false
|
||||
disable_signout_menu: true
|
||||
news:
|
||||
news_feed_enabled: false
|
||||
7
grafana/kustomization.yaml
Normal file
7
grafana/kustomization.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
# ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리
|
||||
# - argocd/grafana.yaml
|
||||
- vault/grafana-admin-password.yaml
|
||||
22
grafana/vault/grafana-admin-password.yaml
Normal file
22
grafana/vault/grafana-admin-password.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
apiVersion: external-secrets.io/v1beta1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
name: grafana-admin-password
|
||||
namespace: monitoring
|
||||
spec:
|
||||
refreshInterval: 1h
|
||||
secretStoreRef:
|
||||
kind: ClusterSecretStore
|
||||
name: vault-backend
|
||||
target:
|
||||
name: grafana-admin-password
|
||||
creationPolicy: Owner
|
||||
data:
|
||||
- secretKey: admin-user
|
||||
remoteRef:
|
||||
key: monitoring/grafana
|
||||
property: ADMIN_USER
|
||||
- secretKey: admin-password
|
||||
remoteRef:
|
||||
key: monitoring/grafana
|
||||
property: ADMIN_PASSWORD
|
||||
46
kube-state-metrics/argocd/kube-state-metrics.yaml
Normal file
46
kube-state-metrics/argocd/kube-state-metrics.yaml
Normal file
@@ -0,0 +1,46 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: kube-state-metrics
|
||||
namespace: argocd
|
||||
finalizers:
|
||||
- resources-finalizer.argocd.argoproj.io
|
||||
spec:
|
||||
project: default
|
||||
|
||||
sources:
|
||||
# Helm chart from external repository
|
||||
- repoURL: https://prometheus-community.github.io/helm-charts
|
||||
chart: kube-state-metrics
|
||||
targetRevision: 5.25.1
|
||||
helm:
|
||||
valueFiles:
|
||||
- $values/kube-state-metrics/helm-values/kube-state-metrics.yaml
|
||||
# Values file from Git repository
|
||||
- repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git
|
||||
targetRevision: main
|
||||
ref: values
|
||||
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: monitoring
|
||||
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
allowEmpty: false
|
||||
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
- PrunePropagationPolicy=foreground
|
||||
- PruneLast=true
|
||||
|
||||
retry:
|
||||
limit: 5
|
||||
backoff:
|
||||
duration: 5s
|
||||
factor: 2
|
||||
maxDuration: 3m
|
||||
|
||||
revisionHistoryLimit: 10
|
||||
24
kube-state-metrics/helm-values/kube-state-metrics.yaml
Normal file
24
kube-state-metrics/helm-values/kube-state-metrics.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
# Kube State Metrics Helm Values
|
||||
# Chart: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics
|
||||
|
||||
fullnameOverride: kube-state-metrics
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 64Mi
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
clusterIP: None
|
||||
|
||||
# Prometheus ServiceMonitor 설정
|
||||
prometheus:
|
||||
monitor:
|
||||
enabled: true
|
||||
additionalLabels:
|
||||
release: prometheus
|
||||
namespace: monitoring
|
||||
relabelings:
|
||||
- targetLabel: cluster
|
||||
replacement: "mayne-cluster"
|
||||
6
kube-state-metrics/kustomization.yaml
Normal file
6
kube-state-metrics/kustomization.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
# ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리
|
||||
# - argocd/kube-state-metrics.yaml
|
||||
26
loki/argocd/loki.yaml
Normal file
26
loki/argocd/loki.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: loki
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: default
|
||||
sources:
|
||||
- repoURL: https://grafana.github.io/helm-charts
|
||||
chart: loki
|
||||
targetRevision: 6.24.0
|
||||
helm:
|
||||
valueFiles:
|
||||
- $values/loki/helm-values/loki.yaml
|
||||
- repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git
|
||||
targetRevision: HEAD
|
||||
ref: values
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: logging
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
78
loki/helm-values/loki.yaml
Normal file
78
loki/helm-values/loki.yaml
Normal file
@@ -0,0 +1,78 @@
|
||||
# Loki Helm Values
|
||||
# Chart: https://grafana.github.io/helm-charts
|
||||
# Simple single binary deployment
|
||||
|
||||
loki:
|
||||
# Use filesystem storage (simple setup)
|
||||
storage:
|
||||
type: filesystem
|
||||
|
||||
# Single binary mode for simplicity
|
||||
commonConfig:
|
||||
replication_factor: 1
|
||||
|
||||
# Schema config
|
||||
schemaConfig:
|
||||
configs:
|
||||
- from: "2024-01-01"
|
||||
store: tsdb
|
||||
object_store: filesystem
|
||||
schema: v13
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
|
||||
# Limits
|
||||
limits_config:
|
||||
retention_period: 168h # 7 days
|
||||
ingestion_rate_mb: 10
|
||||
ingestion_burst_size_mb: 20
|
||||
max_streams_per_user: 10000
|
||||
|
||||
# Auth disabled for simplicity
|
||||
auth_enabled: false
|
||||
|
||||
# Use single binary deployment (simpler)
|
||||
deploymentMode: SingleBinary
|
||||
|
||||
singleBinary:
|
||||
replicas: 1
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 10Gi
|
||||
storageClass: local-path
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
|
||||
# Disable components not needed in single binary mode
|
||||
backend:
|
||||
replicas: 0
|
||||
read:
|
||||
replicas: 0
|
||||
write:
|
||||
replicas: 0
|
||||
|
||||
# Gateway disabled (direct access)
|
||||
gateway:
|
||||
enabled: false
|
||||
|
||||
# Disable all caching (use simple mode)
|
||||
chunksCache:
|
||||
enabled: false
|
||||
resultsCache:
|
||||
enabled: false
|
||||
|
||||
# Disable monitoring components
|
||||
monitoring:
|
||||
selfMonitoring:
|
||||
enabled: false
|
||||
grafanaAgent:
|
||||
installOperator: false
|
||||
lokiCanary:
|
||||
enabled: false
|
||||
|
||||
# Test disabled
|
||||
test:
|
||||
enabled: false
|
||||
6
loki/kustomization.yaml
Normal file
6
loki/kustomization.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
# ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리
|
||||
# - argocd/loki.yaml
|
||||
46
node-exporter/argocd/node-exporter.yaml
Normal file
46
node-exporter/argocd/node-exporter.yaml
Normal file
@@ -0,0 +1,46 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: node-exporter
|
||||
namespace: argocd
|
||||
finalizers:
|
||||
- resources-finalizer.argocd.argoproj.io
|
||||
spec:
|
||||
project: default
|
||||
|
||||
sources:
|
||||
# Helm chart from external repository
|
||||
- repoURL: https://prometheus-community.github.io/helm-charts
|
||||
chart: prometheus-node-exporter
|
||||
targetRevision: 4.39.0
|
||||
helm:
|
||||
valueFiles:
|
||||
- $values/node-exporter/helm-values/node-exporter.yaml
|
||||
# Values file from Git repository
|
||||
- repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git
|
||||
targetRevision: main
|
||||
ref: values
|
||||
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: monitoring
|
||||
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
allowEmpty: false
|
||||
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
- PrunePropagationPolicy=foreground
|
||||
- PruneLast=true
|
||||
|
||||
retry:
|
||||
limit: 5
|
||||
backoff:
|
||||
duration: 5s
|
||||
factor: 2
|
||||
maxDuration: 3m
|
||||
|
||||
revisionHistoryLimit: 10
|
||||
33
node-exporter/helm-values/node-exporter.yaml
Normal file
33
node-exporter/helm-values/node-exporter.yaml
Normal file
@@ -0,0 +1,33 @@
|
||||
# Prometheus Node Exporter Helm Values
|
||||
# Chart: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter
|
||||
|
||||
fullnameOverride: node-exporter
|
||||
|
||||
hostNetwork: true
|
||||
hostPID: true
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 50Mi
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
clusterIP: None
|
||||
|
||||
# Prometheus ServiceMonitor 설정
|
||||
prometheus:
|
||||
monitor:
|
||||
enabled: true
|
||||
additionalLabels:
|
||||
release: prometheus
|
||||
namespace: monitoring
|
||||
attachMetadata:
|
||||
node: true
|
||||
relabelings:
|
||||
- targetLabel: cluster
|
||||
replacement: "mayne-cluster"
|
||||
|
||||
tolerations:
|
||||
- effect: NoSchedule
|
||||
operator: Exists
|
||||
6
node-exporter/kustomization.yaml
Normal file
6
node-exporter/kustomization.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
# ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리
|
||||
# - argocd/node-exporter.yaml
|
||||
50
prometheus/argocd/prometheus.yaml
Normal file
50
prometheus/argocd/prometheus.yaml
Normal file
@@ -0,0 +1,50 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: argocd
|
||||
finalizers:
|
||||
- resources-finalizer.argocd.argoproj.io
|
||||
spec:
|
||||
project: default
|
||||
|
||||
sources:
|
||||
# Helm chart from external repository
|
||||
- repoURL: https://prometheus-community.github.io/helm-charts
|
||||
chart: kube-prometheus-stack
|
||||
targetRevision: 80.0.0
|
||||
helm:
|
||||
valueFiles:
|
||||
- $values/prometheus/helm-values/kube-prometheus-stack.yaml
|
||||
# Values file from Git repository
|
||||
- repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git
|
||||
targetRevision: main
|
||||
ref: values
|
||||
# Vault secrets from Git repository
|
||||
- repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git
|
||||
targetRevision: main
|
||||
path: prometheus
|
||||
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: monitoring
|
||||
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
allowEmpty: false
|
||||
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
- PrunePropagationPolicy=foreground
|
||||
- PruneLast=true
|
||||
|
||||
retry:
|
||||
limit: 5
|
||||
backoff:
|
||||
duration: 5s
|
||||
factor: 2
|
||||
maxDuration: 3m
|
||||
|
||||
revisionHistoryLimit: 10
|
||||
191
prometheus/helm-values/kube-prometheus-stack.yaml
Normal file
191
prometheus/helm-values/kube-prometheus-stack.yaml
Normal file
@@ -0,0 +1,191 @@
|
||||
# Kube-Prometheus-Stack Helm Values
|
||||
# Chart: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack
|
||||
# Includes: Prometheus Operator, Prometheus, Alertmanager, Grafana, and various exporters
|
||||
|
||||
# Global settings
|
||||
fullnameOverride: ""
|
||||
|
||||
# CRD 관리
|
||||
crds:
|
||||
enabled: false # CRD는 수동으로 설치하여 annotation 크기 문제 방지
|
||||
|
||||
# Prometheus Operator
|
||||
prometheusOperator:
|
||||
enabled: true
|
||||
# CRD 생성 비활성화
|
||||
createCustomResource: false
|
||||
|
||||
# Kubelet ServiceMonitor with cluster label
|
||||
kubelet:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
# cAdvisor metrics (container_memory_working_set_bytes 등)
|
||||
cAdvisorRelabelings:
|
||||
- targetLabel: cluster
|
||||
replacement: "mayne-cluster"
|
||||
# Resource metrics
|
||||
resourceRelabelings:
|
||||
- targetLabel: cluster
|
||||
replacement: "mayne-cluster"
|
||||
# Probes metrics
|
||||
probesRelabelings:
|
||||
- targetLabel: cluster
|
||||
replacement: "mayne-cluster"
|
||||
|
||||
# Prometheus
|
||||
prometheus:
|
||||
enabled: true
|
||||
|
||||
prometheusSpec:
|
||||
scrapeInterval: 30s
|
||||
evaluationInterval: 30s
|
||||
retention: 7d
|
||||
|
||||
storageSpec:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
storageClassName: local-path
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 256Mi
|
||||
|
||||
# ServiceMonitor 자동 발견 - 모든 ServiceMonitor 선택
|
||||
serviceMonitorSelectorNilUsesHelmValues: false
|
||||
serviceMonitorSelector: {}
|
||||
podMonitorSelectorNilUsesHelmValues: false
|
||||
podMonitorSelector: {}
|
||||
probeSelectorNilUsesHelmValues: false
|
||||
ruleSelector: {}
|
||||
|
||||
# Alertmanager 설정
|
||||
alertingEndpoints:
|
||||
- name: alertmanager
|
||||
namespace: monitoring
|
||||
port: http-web
|
||||
scheme: http
|
||||
|
||||
# Alertmanager (기존 alertmanager 사용)
|
||||
alertmanager:
|
||||
enabled: false
|
||||
|
||||
# Grafana (기존 grafana 사용)
|
||||
grafana:
|
||||
enabled: false
|
||||
|
||||
# Node Exporter (기존 node-exporter 사용)
|
||||
nodeExporter:
|
||||
enabled: false
|
||||
|
||||
# Kube State Metrics (기존 kube-state-metrics 사용)
|
||||
# 별도로 배포된 kube-state-metrics가 자체 ServiceMonitor 생성
|
||||
kubeStateMetrics:
|
||||
enabled: false
|
||||
|
||||
# 기본 ServiceMonitors
|
||||
defaultRules:
|
||||
create: true
|
||||
rules:
|
||||
alertmanager: true
|
||||
etcd: false
|
||||
configReloaders: true
|
||||
general: true
|
||||
k8s: true
|
||||
kubeApiserverAvailability: true
|
||||
kubeApiserverBurnrate: true
|
||||
kubeApiserverHistogram: true
|
||||
kubeApiserverSlos: true
|
||||
kubeControllerManager: false
|
||||
kubelet: true
|
||||
kubeProxy: false
|
||||
kubePrometheusGeneral: true
|
||||
kubePrometheusNodeRecording: true
|
||||
kubernetesApps: true
|
||||
kubernetesResources: true
|
||||
kubernetesStorage: true
|
||||
kubernetesSystem: true
|
||||
kubeSchedulerAlerting: false
|
||||
kubeSchedulerRecording: false
|
||||
kubeStateMetrics: true
|
||||
network: true
|
||||
node: true
|
||||
nodeExporterAlerting: true
|
||||
nodeExporterRecording: true
|
||||
prometheus: true
|
||||
prometheusOperator: true
|
||||
|
||||
# Additional scrape configs for existing services
|
||||
prometheus:
|
||||
prometheusSpec:
|
||||
# External labels added to all metrics
|
||||
externalLabels:
|
||||
cluster: "mayne-cluster"
|
||||
|
||||
additionalScrapeConfigs:
|
||||
# ArgoCD metrics
|
||||
- job_name: 'argocd-metrics'
|
||||
static_configs:
|
||||
- targets:
|
||||
- 'argocd-metrics.argocd.svc.cluster.local:8082'
|
||||
labels:
|
||||
service: argocd-controller
|
||||
- targets:
|
||||
- 'argocd-server-metrics.argocd.svc.cluster.local:8083'
|
||||
labels:
|
||||
service: argocd-server
|
||||
- targets:
|
||||
- 'argocd-repo-server.argocd.svc.cluster.local:8084'
|
||||
labels:
|
||||
service: argocd-repo
|
||||
|
||||
# Cert-Manager
|
||||
- job_name: 'cert-manager'
|
||||
static_configs:
|
||||
- targets:
|
||||
- 'cert-manager.cert-manager.svc.cluster.local:9402'
|
||||
|
||||
# MinIO
|
||||
- job_name: 'minio-cluster'
|
||||
static_configs:
|
||||
- targets:
|
||||
- 'minio.minio.svc.cluster.local:9000'
|
||||
metrics_path: /minio/v2/metrics/cluster
|
||||
scheme: http
|
||||
|
||||
- job_name: 'minio-node'
|
||||
static_configs:
|
||||
- targets:
|
||||
- 'minio.minio.svc.cluster.local:9000'
|
||||
metrics_path: /minio/v2/metrics/node
|
||||
scheme: http
|
||||
|
||||
# Ingress NGINX
|
||||
- job_name: 'ingress-nginx'
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
namespaces:
|
||||
names:
|
||||
- ingress-nginx
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
|
||||
action: keep
|
||||
regex: ingress-nginx
|
||||
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component]
|
||||
action: keep
|
||||
regex: controller
|
||||
- source_labels: [__address__]
|
||||
action: replace
|
||||
regex: ([^:]+)(?::\d+)?
|
||||
replacement: $1:10254
|
||||
target_label: __address__
|
||||
- source_labels: [__meta_kubernetes_pod_name]
|
||||
action: replace
|
||||
target_label: pod
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: namespace
|
||||
7
prometheus/kustomization.yaml
Normal file
7
prometheus/kustomization.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
# ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리
|
||||
# - argocd/prometheus.yaml
|
||||
- vault/postgresql-password.yaml
|
||||
18
prometheus/vault/postgresql-password.yaml
Normal file
18
prometheus/vault/postgresql-password.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
apiVersion: external-secrets.io/v1beta1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
name: postgresql-password
|
||||
namespace: monitoring
|
||||
spec:
|
||||
refreshInterval: 1h
|
||||
secretStoreRef:
|
||||
kind: ClusterSecretStore
|
||||
name: vault-backend
|
||||
target:
|
||||
name: postgresql-password
|
||||
creationPolicy: Owner
|
||||
data:
|
||||
- secretKey: password
|
||||
remoteRef:
|
||||
key: monitoring/postgres
|
||||
property: PASSWORD
|
||||
26
promtail/argocd/promtail.yaml
Normal file
26
promtail/argocd/promtail.yaml
Normal file
@@ -0,0 +1,26 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: promtail
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: default
|
||||
sources:
|
||||
- repoURL: https://grafana.github.io/helm-charts
|
||||
chart: promtail
|
||||
targetRevision: 6.16.6
|
||||
helm:
|
||||
valueFiles:
|
||||
- $values/promtail/helm-values/promtail.yaml
|
||||
- repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git
|
||||
targetRevision: HEAD
|
||||
ref: values
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: logging
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
49
promtail/helm-values/promtail.yaml
Normal file
49
promtail/helm-values/promtail.yaml
Normal file
@@ -0,0 +1,49 @@
|
||||
# Promtail Helm Values
|
||||
# Chart: https://grafana.github.io/helm-charts
|
||||
# Log collector agent (DaemonSet on all nodes)
|
||||
|
||||
# Loki endpoint
|
||||
config:
|
||||
clients:
|
||||
- url: http://loki.logging.svc.cluster.local:3100/loki/api/v1/push
|
||||
|
||||
# Default scrape config (use defaults)
|
||||
defaultVolumes:
|
||||
- name: run
|
||||
hostPath:
|
||||
path: /run/promtail
|
||||
- name: containers
|
||||
hostPath:
|
||||
path: /var/lib/docker/containers
|
||||
- name: pods
|
||||
hostPath:
|
||||
path: /var/log/pods
|
||||
|
||||
defaultVolumeMounts:
|
||||
- name: run
|
||||
mountPath: /run/promtail
|
||||
- name: containers
|
||||
mountPath: /var/lib/docker/containers
|
||||
readOnly: true
|
||||
- name: pods
|
||||
mountPath: /var/log/pods
|
||||
readOnly: true
|
||||
|
||||
# Resources
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 64Mi
|
||||
|
||||
# Tolerations to run on all nodes including master
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/master
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
|
||||
# ServiceMonitor disabled
|
||||
serviceMonitor:
|
||||
enabled: false
|
||||
6
promtail/kustomization.yaml
Normal file
6
promtail/kustomization.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
# ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리
|
||||
# - argocd/promtail.yaml
|
||||
Reference in New Issue
Block a user