From 3b5bf20902cd9bff696933d13ed98df4323d14cd Mon Sep 17 00:00:00 2001 From: Mayne0213 Date: Mon, 12 Jan 2026 01:07:58 +0900 Subject: [PATCH] PERF(observability): optimize resources via VPA - alertmanager: CPU 15m/15m, memory 100Mi/100Mi - blackbox-exporter: CPU 15m/32m, memory 100Mi/100Mi - goldilocks: controller 15m/25m, dashboard 15m/15m - grafana: CPU 22m/24m, memory 144Mi/242Mi (upperBound) - kube-state-metrics: CPU 15m/15m, memory 100Mi/100Mi - loki: CPU 10m/69m, memory 225Mi/323Mi - node-exporter: CPU 15m/15m, memory 100Mi/100Mi - opentelemetry: CPU 34m/410m, memory 142Mi/1024Mi - prometheus-operator: CPU 15m/15m, memory 100Mi/100Mi - tempo: CPU 15m/15m, memory 100Mi/109Mi - thanos: CPU 15m/15m, memory 100Mi/126Mi - vpa: CPU 15m/15m, memory 100Mi/100Mi --- alertmanager/helm-values.yaml | 2 ++ blackbox-exporter/helm-values.yaml | 4 +++- goldilocks/helm-values.yaml | 4 ++++ grafana/helm-values.yaml | 8 +++++--- kube-state-metrics/helm-values.yaml | 6 ++++-- loki/helm-values.yaml | 8 +++++--- node-exporter/helm-values.yaml | 2 ++ opentelemetry/helm-values.yaml | 9 +++++---- prometheus/helm-values.yaml | 8 ++++++++ tempo/helm-values.yaml | 5 +++-- thanos/helm-values.yaml | 6 ++++-- vpa/helm-values.yaml | 2 ++ 12 files changed, 47 insertions(+), 17 deletions(-) diff --git a/alertmanager/helm-values.yaml b/alertmanager/helm-values.yaml index 20e3baf..d934e87 100644 --- a/alertmanager/helm-values.yaml +++ b/alertmanager/helm-values.yaml @@ -21,11 +21,13 @@ affinity: persistence: enabled: false +# Resource settings (VPA lowerBound/target) resources: requests: cpu: 15m memory: 100Mi limits: + cpu: 15m memory: 100Mi # Disable default config - use secret instead diff --git a/blackbox-exporter/helm-values.yaml b/blackbox-exporter/helm-values.yaml index abb3355..3ef4191 100644 --- a/blackbox-exporter/helm-values.yaml +++ b/blackbox-exporter/helm-values.yaml @@ -5,11 +5,13 @@ fullnameOverride: blackbox-exporter replicas: 1 +# Resource settings (VPA lowerBound/upperBound) resources: requests: - cpu: 23m + cpu: 15m memory: 100Mi limits: + cpu: 32m memory: 100Mi config: diff --git a/goldilocks/helm-values.yaml b/goldilocks/helm-values.yaml index d18bbd4..a4aed7e 100644 --- a/goldilocks/helm-values.yaml +++ b/goldilocks/helm-values.yaml @@ -6,11 +6,13 @@ dashboard: enabled: true replicaCount: 1 + # Resource settings (VPA lowerBound/upperBound) resources: requests: cpu: 15m memory: 100Mi limits: + cpu: 15m memory: 100Mi service: @@ -49,11 +51,13 @@ controller: enabled: true replicaCount: 1 + # Resource settings (VPA lowerBound/upperBound) resources: requests: cpu: 15m memory: 100Mi limits: + cpu: 25m memory: 100Mi # Enable VPA recommendations for all namespaces diff --git a/grafana/helm-values.yaml b/grafana/helm-values.yaml index 38bda65..cf545bf 100644 --- a/grafana/helm-values.yaml +++ b/grafana/helm-values.yaml @@ -39,12 +39,14 @@ podSecurityContext: fsGroup: 472 fsGroupChangePolicy: "Always" +# Resource settings (VPA lowerBound/upperBound) resources: requests: - cpu: 23m - memory: 256Mi + cpu: 22m + memory: 144Mi limits: - memory: 256Mi + cpu: 24m + memory: 242Mi service: type: ClusterIP diff --git a/kube-state-metrics/helm-values.yaml b/kube-state-metrics/helm-values.yaml index c1adb8c..edd15ce 100644 --- a/kube-state-metrics/helm-values.yaml +++ b/kube-state-metrics/helm-values.yaml @@ -7,12 +7,14 @@ fullnameOverride: kube-state-metrics # Note: kube-state-metrics는 stateless이지만, 여러 replica는 동일한 메트릭을 중복 생성하므로 # 단일 replica로 실행하는 것이 권장됩니다. +# Resource settings (VPA lowerBound/upperBound) resources: requests: cpu: 15m - memory: 105Mi + memory: 100Mi limits: - memory: 105Mi + cpu: 15m + memory: 100Mi service: type: ClusterIP diff --git a/loki/helm-values.yaml b/loki/helm-values.yaml index 6926e18..11bd7ea 100644 --- a/loki/helm-values.yaml +++ b/loki/helm-values.yaml @@ -60,12 +60,14 @@ singleBinary: mountPath: /var/loki # Medium priority for observability priorityClassName: medium-priority + # Resource settings (VPA lowerBound/target) resources: requests: - cpu: 63m - memory: 363Mi + cpu: 10m + memory: 225Mi limits: - memory: 363Mi + cpu: 69m + memory: 323Mi # Disable components not needed in single binary mode backend: diff --git a/node-exporter/helm-values.yaml b/node-exporter/helm-values.yaml index 36abc23..53e32c7 100644 --- a/node-exporter/helm-values.yaml +++ b/node-exporter/helm-values.yaml @@ -6,11 +6,13 @@ fullnameOverride: node-exporter hostNetwork: true hostPID: true +# Resource settings (VPA lowerBound/upperBound) resources: requests: cpu: 15m memory: 100Mi limits: + cpu: 15m memory: 100Mi service: diff --git a/opentelemetry/helm-values.yaml b/opentelemetry/helm-values.yaml index 7931244..c73f5f6 100644 --- a/opentelemetry/helm-values.yaml +++ b/opentelemetry/helm-values.yaml @@ -28,14 +28,15 @@ image: mode: daemonset # ============================================================================= -# Resource Limits (based on VPA recommendation) +# Resource Limits (VPA lowerBound/upperBound, mem limit capped at 1024Mi) # ============================================================================= resources: requests: - cpu: 25m - memory: 400Mi + cpu: 34m + memory: 142Mi limits: - memory: 400Mi + cpu: 410m + memory: 1024Mi # ============================================================================= # Environment Variables diff --git a/prometheus/helm-values.yaml b/prometheus/helm-values.yaml index e75b12e..b6b9c69 100644 --- a/prometheus/helm-values.yaml +++ b/prometheus/helm-values.yaml @@ -14,6 +14,14 @@ prometheusOperator: enabled: true # CRD 생성 비활성화 createCustomResource: false + # Resource settings (VPA lowerBound/upperBound) + resources: + requests: + cpu: 15m + memory: 100Mi + limits: + cpu: 15m + memory: 100Mi # Kubelet ServiceMonitor with cluster label kubelet: diff --git a/tempo/helm-values.yaml b/tempo/helm-values.yaml index 8eaab14..eedd287 100644 --- a/tempo/helm-values.yaml +++ b/tempo/helm-values.yaml @@ -17,13 +17,14 @@ replicas: 1 # Tempo Configuration # ============================================================================= tempo: - # Resource Limits (optimized for small cluster) + # Resource settings (VPA lowerBound/target) resources: requests: cpu: 15m memory: 100Mi limits: - memory: 100Mi + cpu: 15m + memory: 109Mi # Receivers - protocols Tempo accepts receivers: otlp: diff --git a/thanos/helm-values.yaml b/thanos/helm-values.yaml index 356299e..38b367d 100644 --- a/thanos/helm-values.yaml +++ b/thanos/helm-values.yaml @@ -46,12 +46,14 @@ query: - --query.replica-label=prometheus_replica - --query.auto-downsampling + # Resource settings (VPA lowerBound/target) resources: requests: cpu: 15m - memory: 283Mi + memory: 100Mi limits: - memory: 283Mi + cpu: 15m + memory: 126Mi # ============================================================================= # Query Frontend - Caching layer for Query (optional, disabled for small cluster) diff --git a/vpa/helm-values.yaml b/vpa/helm-values.yaml index 8901c36..58baafe 100644 --- a/vpa/helm-values.yaml +++ b/vpa/helm-values.yaml @@ -6,11 +6,13 @@ recommender: enabled: true replicaCount: 1 + # Resource settings (VPA lowerBound/upperBound) resources: requests: cpu: 15m memory: 100Mi limits: + cpu: 15m memory: 100Mi # Updater - applies recommended resource requests to pods