INIT(repo): monitoring stack setup
This commit is contained in:
50
prometheus/argocd/prometheus.yaml
Normal file
50
prometheus/argocd/prometheus.yaml
Normal file
@@ -0,0 +1,50 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: prometheus
|
||||
namespace: argocd
|
||||
finalizers:
|
||||
- resources-finalizer.argocd.argoproj.io
|
||||
spec:
|
||||
project: default
|
||||
|
||||
sources:
|
||||
# Helm chart from external repository
|
||||
- repoURL: https://prometheus-community.github.io/helm-charts
|
||||
chart: kube-prometheus-stack
|
||||
targetRevision: 80.0.0
|
||||
helm:
|
||||
valueFiles:
|
||||
- $values/prometheus/helm-values/kube-prometheus-stack.yaml
|
||||
# Values file from Git repository
|
||||
- repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git
|
||||
targetRevision: main
|
||||
ref: values
|
||||
# Vault secrets from Git repository
|
||||
- repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git
|
||||
targetRevision: main
|
||||
path: prometheus
|
||||
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: monitoring
|
||||
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
allowEmpty: false
|
||||
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
- PrunePropagationPolicy=foreground
|
||||
- PruneLast=true
|
||||
|
||||
retry:
|
||||
limit: 5
|
||||
backoff:
|
||||
duration: 5s
|
||||
factor: 2
|
||||
maxDuration: 3m
|
||||
|
||||
revisionHistoryLimit: 10
|
||||
191
prometheus/helm-values/kube-prometheus-stack.yaml
Normal file
191
prometheus/helm-values/kube-prometheus-stack.yaml
Normal file
@@ -0,0 +1,191 @@
|
||||
# Kube-Prometheus-Stack Helm Values
|
||||
# Chart: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack
|
||||
# Includes: Prometheus Operator, Prometheus, Alertmanager, Grafana, and various exporters
|
||||
|
||||
# Global settings
|
||||
fullnameOverride: ""
|
||||
|
||||
# CRD 관리
|
||||
crds:
|
||||
enabled: false # CRD는 수동으로 설치하여 annotation 크기 문제 방지
|
||||
|
||||
# Prometheus Operator
|
||||
prometheusOperator:
|
||||
enabled: true
|
||||
# CRD 생성 비활성화
|
||||
createCustomResource: false
|
||||
|
||||
# Kubelet ServiceMonitor with cluster label
|
||||
kubelet:
|
||||
enabled: true
|
||||
serviceMonitor:
|
||||
# cAdvisor metrics (container_memory_working_set_bytes 등)
|
||||
cAdvisorRelabelings:
|
||||
- targetLabel: cluster
|
||||
replacement: "mayne-cluster"
|
||||
# Resource metrics
|
||||
resourceRelabelings:
|
||||
- targetLabel: cluster
|
||||
replacement: "mayne-cluster"
|
||||
# Probes metrics
|
||||
probesRelabelings:
|
||||
- targetLabel: cluster
|
||||
replacement: "mayne-cluster"
|
||||
|
||||
# Prometheus
|
||||
prometheus:
|
||||
enabled: true
|
||||
|
||||
prometheusSpec:
|
||||
scrapeInterval: 30s
|
||||
evaluationInterval: 30s
|
||||
retention: 7d
|
||||
|
||||
storageSpec:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
storageClassName: local-path
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 256Mi
|
||||
|
||||
# ServiceMonitor 자동 발견 - 모든 ServiceMonitor 선택
|
||||
serviceMonitorSelectorNilUsesHelmValues: false
|
||||
serviceMonitorSelector: {}
|
||||
podMonitorSelectorNilUsesHelmValues: false
|
||||
podMonitorSelector: {}
|
||||
probeSelectorNilUsesHelmValues: false
|
||||
ruleSelector: {}
|
||||
|
||||
# Alertmanager 설정
|
||||
alertingEndpoints:
|
||||
- name: alertmanager
|
||||
namespace: monitoring
|
||||
port: http-web
|
||||
scheme: http
|
||||
|
||||
# Alertmanager (기존 alertmanager 사용)
|
||||
alertmanager:
|
||||
enabled: false
|
||||
|
||||
# Grafana (기존 grafana 사용)
|
||||
grafana:
|
||||
enabled: false
|
||||
|
||||
# Node Exporter (기존 node-exporter 사용)
|
||||
nodeExporter:
|
||||
enabled: false
|
||||
|
||||
# Kube State Metrics (기존 kube-state-metrics 사용)
|
||||
# 별도로 배포된 kube-state-metrics가 자체 ServiceMonitor 생성
|
||||
kubeStateMetrics:
|
||||
enabled: false
|
||||
|
||||
# 기본 ServiceMonitors
|
||||
defaultRules:
|
||||
create: true
|
||||
rules:
|
||||
alertmanager: true
|
||||
etcd: false
|
||||
configReloaders: true
|
||||
general: true
|
||||
k8s: true
|
||||
kubeApiserverAvailability: true
|
||||
kubeApiserverBurnrate: true
|
||||
kubeApiserverHistogram: true
|
||||
kubeApiserverSlos: true
|
||||
kubeControllerManager: false
|
||||
kubelet: true
|
||||
kubeProxy: false
|
||||
kubePrometheusGeneral: true
|
||||
kubePrometheusNodeRecording: true
|
||||
kubernetesApps: true
|
||||
kubernetesResources: true
|
||||
kubernetesStorage: true
|
||||
kubernetesSystem: true
|
||||
kubeSchedulerAlerting: false
|
||||
kubeSchedulerRecording: false
|
||||
kubeStateMetrics: true
|
||||
network: true
|
||||
node: true
|
||||
nodeExporterAlerting: true
|
||||
nodeExporterRecording: true
|
||||
prometheus: true
|
||||
prometheusOperator: true
|
||||
|
||||
# Additional scrape configs for existing services
|
||||
prometheus:
|
||||
prometheusSpec:
|
||||
# External labels added to all metrics
|
||||
externalLabels:
|
||||
cluster: "mayne-cluster"
|
||||
|
||||
additionalScrapeConfigs:
|
||||
# ArgoCD metrics
|
||||
- job_name: 'argocd-metrics'
|
||||
static_configs:
|
||||
- targets:
|
||||
- 'argocd-metrics.argocd.svc.cluster.local:8082'
|
||||
labels:
|
||||
service: argocd-controller
|
||||
- targets:
|
||||
- 'argocd-server-metrics.argocd.svc.cluster.local:8083'
|
||||
labels:
|
||||
service: argocd-server
|
||||
- targets:
|
||||
- 'argocd-repo-server.argocd.svc.cluster.local:8084'
|
||||
labels:
|
||||
service: argocd-repo
|
||||
|
||||
# Cert-Manager
|
||||
- job_name: 'cert-manager'
|
||||
static_configs:
|
||||
- targets:
|
||||
- 'cert-manager.cert-manager.svc.cluster.local:9402'
|
||||
|
||||
# MinIO
|
||||
- job_name: 'minio-cluster'
|
||||
static_configs:
|
||||
- targets:
|
||||
- 'minio.minio.svc.cluster.local:9000'
|
||||
metrics_path: /minio/v2/metrics/cluster
|
||||
scheme: http
|
||||
|
||||
- job_name: 'minio-node'
|
||||
static_configs:
|
||||
- targets:
|
||||
- 'minio.minio.svc.cluster.local:9000'
|
||||
metrics_path: /minio/v2/metrics/node
|
||||
scheme: http
|
||||
|
||||
# Ingress NGINX
|
||||
- job_name: 'ingress-nginx'
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
namespaces:
|
||||
names:
|
||||
- ingress-nginx
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name]
|
||||
action: keep
|
||||
regex: ingress-nginx
|
||||
- source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component]
|
||||
action: keep
|
||||
regex: controller
|
||||
- source_labels: [__address__]
|
||||
action: replace
|
||||
regex: ([^:]+)(?::\d+)?
|
||||
replacement: $1:10254
|
||||
target_label: __address__
|
||||
- source_labels: [__meta_kubernetes_pod_name]
|
||||
action: replace
|
||||
target_label: pod
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: namespace
|
||||
7
prometheus/kustomization.yaml
Normal file
7
prometheus/kustomization.yaml
Normal file
@@ -0,0 +1,7 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
# ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리
|
||||
# - argocd/prometheus.yaml
|
||||
- vault/postgresql-password.yaml
|
||||
18
prometheus/vault/postgresql-password.yaml
Normal file
18
prometheus/vault/postgresql-password.yaml
Normal file
@@ -0,0 +1,18 @@
|
||||
apiVersion: external-secrets.io/v1beta1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
name: postgresql-password
|
||||
namespace: monitoring
|
||||
spec:
|
||||
refreshInterval: 1h
|
||||
secretStoreRef:
|
||||
kind: ClusterSecretStore
|
||||
name: vault-backend
|
||||
target:
|
||||
name: postgresql-password
|
||||
creationPolicy: Owner
|
||||
data:
|
||||
- secretKey: password
|
||||
remoteRef:
|
||||
key: monitoring/postgres
|
||||
property: PASSWORD
|
||||
Reference in New Issue
Block a user