From baee94b69d1f107d247e0ac7a0a671c5e412d4f7 Mon Sep 17 00:00:00 2001 From: Mayne0213 Date: Wed, 17 Dec 2025 15:06:58 +0900 Subject: [PATCH] INIT(repo): monitoring stack setup --- alertmanager/argocd/alertmanager.yaml | 46 +++++ alertmanager/helm-values/alertmanager.yaml | 54 +++++ alertmanager/kustomization.yaml | 6 + grafana/argocd/grafana.yaml | 50 +++++ grafana/helm-values/grafana.yaml | 63 ++++++ grafana/kustomization.yaml | 7 + grafana/vault/grafana-admin-password.yaml | 22 ++ .../argocd/kube-state-metrics.yaml | 46 +++++ .../helm-values/kube-state-metrics.yaml | 24 +++ kube-state-metrics/kustomization.yaml | 6 + loki/argocd/loki.yaml | 26 +++ loki/helm-values/loki.yaml | 78 +++++++ loki/kustomization.yaml | 6 + node-exporter/argocd/node-exporter.yaml | 46 +++++ node-exporter/helm-values/node-exporter.yaml | 33 +++ node-exporter/kustomization.yaml | 6 + prometheus/argocd/prometheus.yaml | 50 +++++ .../helm-values/kube-prometheus-stack.yaml | 191 ++++++++++++++++++ prometheus/kustomization.yaml | 7 + prometheus/vault/postgresql-password.yaml | 18 ++ promtail/argocd/promtail.yaml | 26 +++ promtail/helm-values/promtail.yaml | 49 +++++ promtail/kustomization.yaml | 6 + 23 files changed, 866 insertions(+) create mode 100644 alertmanager/argocd/alertmanager.yaml create mode 100644 alertmanager/helm-values/alertmanager.yaml create mode 100644 alertmanager/kustomization.yaml create mode 100644 grafana/argocd/grafana.yaml create mode 100644 grafana/helm-values/grafana.yaml create mode 100644 grafana/kustomization.yaml create mode 100644 grafana/vault/grafana-admin-password.yaml create mode 100644 kube-state-metrics/argocd/kube-state-metrics.yaml create mode 100644 kube-state-metrics/helm-values/kube-state-metrics.yaml create mode 100644 kube-state-metrics/kustomization.yaml create mode 100644 loki/argocd/loki.yaml create mode 100644 loki/helm-values/loki.yaml create mode 100644 loki/kustomization.yaml create mode 100644 node-exporter/argocd/node-exporter.yaml create mode 100644 node-exporter/helm-values/node-exporter.yaml create mode 100644 node-exporter/kustomization.yaml create mode 100644 prometheus/argocd/prometheus.yaml create mode 100644 prometheus/helm-values/kube-prometheus-stack.yaml create mode 100644 prometheus/kustomization.yaml create mode 100644 prometheus/vault/postgresql-password.yaml create mode 100644 promtail/argocd/promtail.yaml create mode 100644 promtail/helm-values/promtail.yaml create mode 100644 promtail/kustomization.yaml diff --git a/alertmanager/argocd/alertmanager.yaml b/alertmanager/argocd/alertmanager.yaml new file mode 100644 index 0000000..703705a --- /dev/null +++ b/alertmanager/argocd/alertmanager.yaml @@ -0,0 +1,46 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: alertmanager + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + + sources: + # Helm chart from external repository + - repoURL: https://prometheus-community.github.io/helm-charts + chart: alertmanager + targetRevision: 1.29.0 + helm: + valueFiles: + - $values/alertmanager/helm-values/alertmanager.yaml + # Values file from Git repository + - repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git + targetRevision: main + ref: values + + destination: + server: https://kubernetes.default.svc + namespace: monitoring + + syncPolicy: + automated: + prune: true + selfHeal: true + allowEmpty: false + + syncOptions: + - CreateNamespace=true + - PrunePropagationPolicy=foreground + - PruneLast=true + + retry: + limit: 5 + backoff: + duration: 5s + factor: 2 + maxDuration: 3m + + revisionHistoryLimit: 10 diff --git a/alertmanager/helm-values/alertmanager.yaml b/alertmanager/helm-values/alertmanager.yaml new file mode 100644 index 0000000..1fe2088 --- /dev/null +++ b/alertmanager/helm-values/alertmanager.yaml @@ -0,0 +1,54 @@ +# Alertmanager Helm Values +# Chart: https://github.com/prometheus-community/helm-charts/tree/main/charts/alertmanager + +fullnameOverride: alertmanager + +persistence: + enabled: true + size: 1Gi + storageClass: local-path + +resources: + requests: + cpu: 10m + memory: 32Mi + +# Prometheus ServiceMonitor 설정 +serviceMonitor: + enabled: true + additionalLabels: + release: prometheus + namespace: monitoring + +config: + global: + resolve_timeout: 5m + route: + group_by: ["alertname", "cluster", "service"] + group_wait: 10s + group_interval: 10s + repeat_interval: 12h + receiver: "default" + routes: + - match: + severity: critical + receiver: "critical" + continue: true + - match: + severity: warning + receiver: "warning" + receivers: + - name: "default" + # 기본 수신자 (로그만 남김) + - name: "critical" + # TODO: Slack, Email 등 알림 채널 추가 + # webhook_configs: + # - url: 'http://your-webhook-url' + - name: "warning" + # TODO: 경고 알림 채널 추가 + inhibit_rules: + - source_match: + severity: "critical" + target_match: + severity: "warning" + equal: ["alertname", "cluster", "service"] diff --git a/alertmanager/kustomization.yaml b/alertmanager/kustomization.yaml new file mode 100644 index 0000000..2dfb0c4 --- /dev/null +++ b/alertmanager/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + # ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리 + # - argocd/alertmanager.yaml diff --git a/grafana/argocd/grafana.yaml b/grafana/argocd/grafana.yaml new file mode 100644 index 0000000..145ddbb --- /dev/null +++ b/grafana/argocd/grafana.yaml @@ -0,0 +1,50 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: grafana + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + + sources: + # Helm chart from external repository + - repoURL: https://grafana.github.io/helm-charts + chart: grafana + targetRevision: 10.3.0 + helm: + valueFiles: + - $values/grafana/helm-values/grafana.yaml + # Values file from Git repository + - repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git + targetRevision: main + ref: values + # Vault secrets from Git repository + - repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git + targetRevision: main + path: grafana + + destination: + server: https://kubernetes.default.svc + namespace: monitoring + + syncPolicy: + automated: + prune: true + selfHeal: true + allowEmpty: false + + syncOptions: + - CreateNamespace=true + - PrunePropagationPolicy=foreground + - PruneLast=true + + retry: + limit: 5 + backoff: + duration: 5s + factor: 2 + maxDuration: 3m + + revisionHistoryLimit: 10 diff --git a/grafana/helm-values/grafana.yaml b/grafana/helm-values/grafana.yaml new file mode 100644 index 0000000..7fd8ab7 --- /dev/null +++ b/grafana/helm-values/grafana.yaml @@ -0,0 +1,63 @@ +# Grafana Helm Values +# Chart: https://github.com/grafana/helm-charts/tree/main/charts/grafana + +fullnameOverride: grafana + +admin: + user: bluemayne + # Password is managed via SealedSecret: grafana-admin-password + existingSecret: grafana-admin-password + userKey: admin-user + passwordKey: admin-password + +persistence: + enabled: true + size: 2Gi + storageClass: local-path + +initChownData: + enabled: false + +podSecurityContext: + fsGroup: 472 + fsGroupChangePolicy: "Always" + +resources: + requests: + cpu: 25m + memory: 128Mi + +service: + type: ClusterIP + port: 80 + +datasources: + datasources.yaml: + apiVersion: 1 + datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://prometheus-kube-prometheus-prometheus:9090 + isDefault: true + editable: true + - name: Loki + type: loki + access: proxy + url: http://loki.logging.svc.cluster.local:3100 + editable: true + +grafana.ini: + server: + root_url: "http://grafana0213.kro.kr" + auth.anonymous: + enabled: false + security: + allow_embedding: true + auth.basic: + enabled: false + auth: + disable_login_form: false + disable_signout_menu: true + news: + news_feed_enabled: false diff --git a/grafana/kustomization.yaml b/grafana/kustomization.yaml new file mode 100644 index 0000000..f6e9d69 --- /dev/null +++ b/grafana/kustomization.yaml @@ -0,0 +1,7 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + # ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리 + # - argocd/grafana.yaml + - vault/grafana-admin-password.yaml diff --git a/grafana/vault/grafana-admin-password.yaml b/grafana/vault/grafana-admin-password.yaml new file mode 100644 index 0000000..3d4a9c6 --- /dev/null +++ b/grafana/vault/grafana-admin-password.yaml @@ -0,0 +1,22 @@ +apiVersion: external-secrets.io/v1beta1 +kind: ExternalSecret +metadata: + name: grafana-admin-password + namespace: monitoring +spec: + refreshInterval: 1h + secretStoreRef: + kind: ClusterSecretStore + name: vault-backend + target: + name: grafana-admin-password + creationPolicy: Owner + data: + - secretKey: admin-user + remoteRef: + key: monitoring/grafana + property: ADMIN_USER + - secretKey: admin-password + remoteRef: + key: monitoring/grafana + property: ADMIN_PASSWORD diff --git a/kube-state-metrics/argocd/kube-state-metrics.yaml b/kube-state-metrics/argocd/kube-state-metrics.yaml new file mode 100644 index 0000000..4f75acc --- /dev/null +++ b/kube-state-metrics/argocd/kube-state-metrics.yaml @@ -0,0 +1,46 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: kube-state-metrics + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + + sources: + # Helm chart from external repository + - repoURL: https://prometheus-community.github.io/helm-charts + chart: kube-state-metrics + targetRevision: 5.25.1 + helm: + valueFiles: + - $values/kube-state-metrics/helm-values/kube-state-metrics.yaml + # Values file from Git repository + - repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git + targetRevision: main + ref: values + + destination: + server: https://kubernetes.default.svc + namespace: monitoring + + syncPolicy: + automated: + prune: true + selfHeal: true + allowEmpty: false + + syncOptions: + - CreateNamespace=true + - PrunePropagationPolicy=foreground + - PruneLast=true + + retry: + limit: 5 + backoff: + duration: 5s + factor: 2 + maxDuration: 3m + + revisionHistoryLimit: 10 diff --git a/kube-state-metrics/helm-values/kube-state-metrics.yaml b/kube-state-metrics/helm-values/kube-state-metrics.yaml new file mode 100644 index 0000000..bd65f0e --- /dev/null +++ b/kube-state-metrics/helm-values/kube-state-metrics.yaml @@ -0,0 +1,24 @@ +# Kube State Metrics Helm Values +# Chart: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-state-metrics + +fullnameOverride: kube-state-metrics + +resources: + requests: + cpu: 10m + memory: 64Mi + +service: + type: ClusterIP + clusterIP: None + +# Prometheus ServiceMonitor 설정 +prometheus: + monitor: + enabled: true + additionalLabels: + release: prometheus + namespace: monitoring + relabelings: + - targetLabel: cluster + replacement: "mayne-cluster" diff --git a/kube-state-metrics/kustomization.yaml b/kube-state-metrics/kustomization.yaml new file mode 100644 index 0000000..491ceaa --- /dev/null +++ b/kube-state-metrics/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + # ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리 + # - argocd/kube-state-metrics.yaml diff --git a/loki/argocd/loki.yaml b/loki/argocd/loki.yaml new file mode 100644 index 0000000..84fdfd0 --- /dev/null +++ b/loki/argocd/loki.yaml @@ -0,0 +1,26 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: loki + namespace: argocd +spec: + project: default + sources: + - repoURL: https://grafana.github.io/helm-charts + chart: loki + targetRevision: 6.24.0 + helm: + valueFiles: + - $values/loki/helm-values/loki.yaml + - repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git + targetRevision: HEAD + ref: values + destination: + server: https://kubernetes.default.svc + namespace: logging + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/loki/helm-values/loki.yaml b/loki/helm-values/loki.yaml new file mode 100644 index 0000000..3448170 --- /dev/null +++ b/loki/helm-values/loki.yaml @@ -0,0 +1,78 @@ +# Loki Helm Values +# Chart: https://grafana.github.io/helm-charts +# Simple single binary deployment + +loki: + # Use filesystem storage (simple setup) + storage: + type: filesystem + + # Single binary mode for simplicity + commonConfig: + replication_factor: 1 + + # Schema config + schemaConfig: + configs: + - from: "2024-01-01" + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + + # Limits + limits_config: + retention_period: 168h # 7 days + ingestion_rate_mb: 10 + ingestion_burst_size_mb: 20 + max_streams_per_user: 10000 + + # Auth disabled for simplicity + auth_enabled: false + +# Use single binary deployment (simpler) +deploymentMode: SingleBinary + +singleBinary: + replicas: 1 + persistence: + enabled: true + size: 10Gi + storageClass: local-path + resources: + requests: + cpu: 100m + memory: 256Mi + +# Disable components not needed in single binary mode +backend: + replicas: 0 +read: + replicas: 0 +write: + replicas: 0 + +# Gateway disabled (direct access) +gateway: + enabled: false + +# Disable all caching (use simple mode) +chunksCache: + enabled: false +resultsCache: + enabled: false + +# Disable monitoring components +monitoring: + selfMonitoring: + enabled: false + grafanaAgent: + installOperator: false + lokiCanary: + enabled: false + +# Test disabled +test: + enabled: false diff --git a/loki/kustomization.yaml b/loki/kustomization.yaml new file mode 100644 index 0000000..28abfdd --- /dev/null +++ b/loki/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + # ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리 + # - argocd/loki.yaml diff --git a/node-exporter/argocd/node-exporter.yaml b/node-exporter/argocd/node-exporter.yaml new file mode 100644 index 0000000..680e7cc --- /dev/null +++ b/node-exporter/argocd/node-exporter.yaml @@ -0,0 +1,46 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: node-exporter + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + + sources: + # Helm chart from external repository + - repoURL: https://prometheus-community.github.io/helm-charts + chart: prometheus-node-exporter + targetRevision: 4.39.0 + helm: + valueFiles: + - $values/node-exporter/helm-values/node-exporter.yaml + # Values file from Git repository + - repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git + targetRevision: main + ref: values + + destination: + server: https://kubernetes.default.svc + namespace: monitoring + + syncPolicy: + automated: + prune: true + selfHeal: true + allowEmpty: false + + syncOptions: + - CreateNamespace=true + - PrunePropagationPolicy=foreground + - PruneLast=true + + retry: + limit: 5 + backoff: + duration: 5s + factor: 2 + maxDuration: 3m + + revisionHistoryLimit: 10 diff --git a/node-exporter/helm-values/node-exporter.yaml b/node-exporter/helm-values/node-exporter.yaml new file mode 100644 index 0000000..bdee1f3 --- /dev/null +++ b/node-exporter/helm-values/node-exporter.yaml @@ -0,0 +1,33 @@ +# Prometheus Node Exporter Helm Values +# Chart: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-node-exporter + +fullnameOverride: node-exporter + +hostNetwork: true +hostPID: true + +resources: + requests: + cpu: 10m + memory: 50Mi + +service: + type: ClusterIP + clusterIP: None + +# Prometheus ServiceMonitor 설정 +prometheus: + monitor: + enabled: true + additionalLabels: + release: prometheus + namespace: monitoring + attachMetadata: + node: true + relabelings: + - targetLabel: cluster + replacement: "mayne-cluster" + +tolerations: + - effect: NoSchedule + operator: Exists diff --git a/node-exporter/kustomization.yaml b/node-exporter/kustomization.yaml new file mode 100644 index 0000000..9e0f18d --- /dev/null +++ b/node-exporter/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + # ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리 + # - argocd/node-exporter.yaml diff --git a/prometheus/argocd/prometheus.yaml b/prometheus/argocd/prometheus.yaml new file mode 100644 index 0000000..bcb20e2 --- /dev/null +++ b/prometheus/argocd/prometheus.yaml @@ -0,0 +1,50 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: prometheus + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + project: default + + sources: + # Helm chart from external repository + - repoURL: https://prometheus-community.github.io/helm-charts + chart: kube-prometheus-stack + targetRevision: 80.0.0 + helm: + valueFiles: + - $values/prometheus/helm-values/kube-prometheus-stack.yaml + # Values file from Git repository + - repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git + targetRevision: main + ref: values + # Vault secrets from Git repository + - repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git + targetRevision: main + path: prometheus + + destination: + server: https://kubernetes.default.svc + namespace: monitoring + + syncPolicy: + automated: + prune: true + selfHeal: true + allowEmpty: false + + syncOptions: + - CreateNamespace=true + - PrunePropagationPolicy=foreground + - PruneLast=true + + retry: + limit: 5 + backoff: + duration: 5s + factor: 2 + maxDuration: 3m + + revisionHistoryLimit: 10 diff --git a/prometheus/helm-values/kube-prometheus-stack.yaml b/prometheus/helm-values/kube-prometheus-stack.yaml new file mode 100644 index 0000000..9998064 --- /dev/null +++ b/prometheus/helm-values/kube-prometheus-stack.yaml @@ -0,0 +1,191 @@ +# Kube-Prometheus-Stack Helm Values +# Chart: https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack +# Includes: Prometheus Operator, Prometheus, Alertmanager, Grafana, and various exporters + +# Global settings +fullnameOverride: "" + +# CRD 관리 +crds: + enabled: false # CRD는 수동으로 설치하여 annotation 크기 문제 방지 + +# Prometheus Operator +prometheusOperator: + enabled: true + # CRD 생성 비활성화 + createCustomResource: false + +# Kubelet ServiceMonitor with cluster label +kubelet: + enabled: true + serviceMonitor: + # cAdvisor metrics (container_memory_working_set_bytes 등) + cAdvisorRelabelings: + - targetLabel: cluster + replacement: "mayne-cluster" + # Resource metrics + resourceRelabelings: + - targetLabel: cluster + replacement: "mayne-cluster" + # Probes metrics + probesRelabelings: + - targetLabel: cluster + replacement: "mayne-cluster" + +# Prometheus +prometheus: + enabled: true + + prometheusSpec: + scrapeInterval: 30s + evaluationInterval: 30s + retention: 7d + + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: local-path + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 5Gi + + resources: + requests: + cpu: 50m + memory: 256Mi + + # ServiceMonitor 자동 발견 - 모든 ServiceMonitor 선택 + serviceMonitorSelectorNilUsesHelmValues: false + serviceMonitorSelector: {} + podMonitorSelectorNilUsesHelmValues: false + podMonitorSelector: {} + probeSelectorNilUsesHelmValues: false + ruleSelector: {} + + # Alertmanager 설정 + alertingEndpoints: + - name: alertmanager + namespace: monitoring + port: http-web + scheme: http + +# Alertmanager (기존 alertmanager 사용) +alertmanager: + enabled: false + +# Grafana (기존 grafana 사용) +grafana: + enabled: false + +# Node Exporter (기존 node-exporter 사용) +nodeExporter: + enabled: false + +# Kube State Metrics (기존 kube-state-metrics 사용) +# 별도로 배포된 kube-state-metrics가 자체 ServiceMonitor 생성 +kubeStateMetrics: + enabled: false + +# 기본 ServiceMonitors +defaultRules: + create: true + rules: + alertmanager: true + etcd: false + configReloaders: true + general: true + k8s: true + kubeApiserverAvailability: true + kubeApiserverBurnrate: true + kubeApiserverHistogram: true + kubeApiserverSlos: true + kubeControllerManager: false + kubelet: true + kubeProxy: false + kubePrometheusGeneral: true + kubePrometheusNodeRecording: true + kubernetesApps: true + kubernetesResources: true + kubernetesStorage: true + kubernetesSystem: true + kubeSchedulerAlerting: false + kubeSchedulerRecording: false + kubeStateMetrics: true + network: true + node: true + nodeExporterAlerting: true + nodeExporterRecording: true + prometheus: true + prometheusOperator: true + +# Additional scrape configs for existing services +prometheus: + prometheusSpec: + # External labels added to all metrics + externalLabels: + cluster: "mayne-cluster" + + additionalScrapeConfigs: + # ArgoCD metrics + - job_name: 'argocd-metrics' + static_configs: + - targets: + - 'argocd-metrics.argocd.svc.cluster.local:8082' + labels: + service: argocd-controller + - targets: + - 'argocd-server-metrics.argocd.svc.cluster.local:8083' + labels: + service: argocd-server + - targets: + - 'argocd-repo-server.argocd.svc.cluster.local:8084' + labels: + service: argocd-repo + + # Cert-Manager + - job_name: 'cert-manager' + static_configs: + - targets: + - 'cert-manager.cert-manager.svc.cluster.local:9402' + + # MinIO + - job_name: 'minio-cluster' + static_configs: + - targets: + - 'minio.minio.svc.cluster.local:9000' + metrics_path: /minio/v2/metrics/cluster + scheme: http + + - job_name: 'minio-node' + static_configs: + - targets: + - 'minio.minio.svc.cluster.local:9000' + metrics_path: /minio/v2/metrics/node + scheme: http + + # Ingress NGINX + - job_name: 'ingress-nginx' + kubernetes_sd_configs: + - role: pod + namespaces: + names: + - ingress-nginx + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name] + action: keep + regex: ingress-nginx + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_component] + action: keep + regex: controller + - source_labels: [__address__] + action: replace + regex: ([^:]+)(?::\d+)? + replacement: $1:10254 + target_label: __address__ + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: pod + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: namespace diff --git a/prometheus/kustomization.yaml b/prometheus/kustomization.yaml new file mode 100644 index 0000000..6deef9e --- /dev/null +++ b/prometheus/kustomization.yaml @@ -0,0 +1,7 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + # ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리 + # - argocd/prometheus.yaml + - vault/postgresql-password.yaml diff --git a/prometheus/vault/postgresql-password.yaml b/prometheus/vault/postgresql-password.yaml new file mode 100644 index 0000000..c0dd249 --- /dev/null +++ b/prometheus/vault/postgresql-password.yaml @@ -0,0 +1,18 @@ +apiVersion: external-secrets.io/v1beta1 +kind: ExternalSecret +metadata: + name: postgresql-password + namespace: monitoring +spec: + refreshInterval: 1h + secretStoreRef: + kind: ClusterSecretStore + name: vault-backend + target: + name: postgresql-password + creationPolicy: Owner + data: + - secretKey: password + remoteRef: + key: monitoring/postgres + property: PASSWORD diff --git a/promtail/argocd/promtail.yaml b/promtail/argocd/promtail.yaml new file mode 100644 index 0000000..1b0e414 --- /dev/null +++ b/promtail/argocd/promtail.yaml @@ -0,0 +1,26 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: promtail + namespace: argocd +spec: + project: default + sources: + - repoURL: https://grafana.github.io/helm-charts + chart: promtail + targetRevision: 6.16.6 + helm: + valueFiles: + - $values/promtail/helm-values/promtail.yaml + - repoURL: https://gitea0213.kro.kr/bluemayne/infrastructure.git + targetRevision: HEAD + ref: values + destination: + server: https://kubernetes.default.svc + namespace: logging + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/promtail/helm-values/promtail.yaml b/promtail/helm-values/promtail.yaml new file mode 100644 index 0000000..b8b495c --- /dev/null +++ b/promtail/helm-values/promtail.yaml @@ -0,0 +1,49 @@ +# Promtail Helm Values +# Chart: https://grafana.github.io/helm-charts +# Log collector agent (DaemonSet on all nodes) + +# Loki endpoint +config: + clients: + - url: http://loki.logging.svc.cluster.local:3100/loki/api/v1/push + +# Default scrape config (use defaults) +defaultVolumes: + - name: run + hostPath: + path: /run/promtail + - name: containers + hostPath: + path: /var/lib/docker/containers + - name: pods + hostPath: + path: /var/log/pods + +defaultVolumeMounts: + - name: run + mountPath: /run/promtail + - name: containers + mountPath: /var/lib/docker/containers + readOnly: true + - name: pods + mountPath: /var/log/pods + readOnly: true + +# Resources +resources: + requests: + cpu: 50m + memory: 64Mi + +# Tolerations to run on all nodes including master +tolerations: + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + +# ServiceMonitor disabled +serviceMonitor: + enabled: false diff --git a/promtail/kustomization.yaml b/promtail/kustomization.yaml new file mode 100644 index 0000000..611ba0a --- /dev/null +++ b/promtail/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + # ArgoCD Application 리소스는 infrastructure/kustomization.yaml에서 관리 + # - argocd/promtail.yaml