From 3b5bf20902cd9bff696933d13ed98df4323d14cd Mon Sep 17 00:00:00 2001
From: Mayne0213 <bluemayne0213@icloud.com>
Date: Mon, 12 Jan 2026 01:07:58 +0900
Subject: [PATCH] PERF(observability): optimize resources via VPA

- alertmanager: CPU 15m/15m, memory 100Mi/100Mi
- blackbox-exporter: CPU 15m/32m, memory 100Mi/100Mi
- goldilocks: controller 15m/25m, dashboard 15m/15m
- grafana: CPU 22m/24m, memory 144Mi/242Mi (upperBound)
- kube-state-metrics: CPU 15m/15m, memory 100Mi/100Mi
- loki: CPU 10m/69m, memory 225Mi/323Mi
- node-exporter: CPU 15m/15m, memory 100Mi/100Mi
- opentelemetry: CPU 34m/410m, memory 142Mi/1024Mi
- prometheus-operator: CPU 15m/15m, memory 100Mi/100Mi
- tempo: CPU 15m/15m, memory 100Mi/109Mi
- thanos: CPU 15m/15m, memory 100Mi/126Mi
- vpa: CPU 15m/15m, memory 100Mi/100Mi
---
 alertmanager/helm-values.yaml       | 2 ++
 blackbox-exporter/helm-values.yaml  | 4 +++-
 goldilocks/helm-values.yaml         | 4 ++++
 grafana/helm-values.yaml            | 8 +++++---
 kube-state-metrics/helm-values.yaml | 6 ++++--
 loki/helm-values.yaml               | 8 +++++---
 node-exporter/helm-values.yaml      | 2 ++
 opentelemetry/helm-values.yaml      | 9 +++++----
 prometheus/helm-values.yaml         | 8 ++++++++
 tempo/helm-values.yaml              | 5 +++--
 thanos/helm-values.yaml             | 6 ++++--
 vpa/helm-values.yaml                | 2 ++
 12 files changed, 47 insertions(+), 17 deletions(-)

diff --git a/alertmanager/helm-values.yaml b/alertmanager/helm-values.yaml
index 20e3baf..d934e87 100644
--- a/alertmanager/helm-values.yaml
+++ b/alertmanager/helm-values.yaml
@@ -21,11 +21,13 @@ affinity:
 persistence:
   enabled: false
 
+# Resource settings (VPA lowerBound/target)
 resources:
   requests:
     cpu: 15m
     memory: 100Mi
   limits:
+    cpu: 15m
     memory: 100Mi
 
 # Disable default config - use secret instead
diff --git a/blackbox-exporter/helm-values.yaml b/blackbox-exporter/helm-values.yaml
index abb3355..3ef4191 100644
--- a/blackbox-exporter/helm-values.yaml
+++ b/blackbox-exporter/helm-values.yaml
@@ -5,11 +5,13 @@ fullnameOverride: blackbox-exporter
 
 replicas: 1
 
+# Resource settings (VPA lowerBound/upperBound)
 resources:
   requests:
-    cpu: 23m
+    cpu: 15m
     memory: 100Mi
   limits:
+    cpu: 32m
     memory: 100Mi
 
 config:
diff --git a/goldilocks/helm-values.yaml b/goldilocks/helm-values.yaml
index d18bbd4..a4aed7e 100644
--- a/goldilocks/helm-values.yaml
+++ b/goldilocks/helm-values.yaml
@@ -6,11 +6,13 @@ dashboard:
   enabled: true
   replicaCount: 1
 
+  # Resource settings (VPA lowerBound/upperBound)
   resources:
     requests:
       cpu: 15m
       memory: 100Mi
     limits:
+      cpu: 15m
       memory: 100Mi
 
   service:
@@ -49,11 +51,13 @@ controller:
   enabled: true
   replicaCount: 1
 
+  # Resource settings (VPA lowerBound/upperBound)
   resources:
     requests:
       cpu: 15m
       memory: 100Mi
     limits:
+      cpu: 25m
       memory: 100Mi
 
   # Enable VPA recommendations for all namespaces
diff --git a/grafana/helm-values.yaml b/grafana/helm-values.yaml
index 38bda65..cf545bf 100644
--- a/grafana/helm-values.yaml
+++ b/grafana/helm-values.yaml
@@ -39,12 +39,14 @@ podSecurityContext:
   fsGroup: 472
   fsGroupChangePolicy: "Always"
 
+# Resource settings (VPA lowerBound/upperBound)
 resources:
   requests:
-    cpu: 23m
-    memory: 256Mi
+    cpu: 22m
+    memory: 144Mi
   limits:
-    memory: 256Mi
+    cpu: 24m
+    memory: 242Mi
 
 service:
   type: ClusterIP
diff --git a/kube-state-metrics/helm-values.yaml b/kube-state-metrics/helm-values.yaml
index c1adb8c..edd15ce 100644
--- a/kube-state-metrics/helm-values.yaml
+++ b/kube-state-metrics/helm-values.yaml
@@ -7,12 +7,14 @@ fullnameOverride: kube-state-metrics
 # Note: kube-state-metrics는 stateless이지만, 여러 replica는 동일한 메트릭을 중복 생성하므로
 # 단일 replica로 실행하는 것이 권장됩니다.
 
+# Resource settings (VPA lowerBound/upperBound)
 resources:
   requests:
     cpu: 15m
-    memory: 105Mi
+    memory: 100Mi
   limits:
-    memory: 105Mi
+    cpu: 15m
+    memory: 100Mi
 
 service:
   type: ClusterIP
diff --git a/loki/helm-values.yaml b/loki/helm-values.yaml
index 6926e18..11bd7ea 100644
--- a/loki/helm-values.yaml
+++ b/loki/helm-values.yaml
@@ -60,12 +60,14 @@ singleBinary:
       mountPath: /var/loki
   # Medium priority for observability
   priorityClassName: medium-priority
+  # Resource settings (VPA lowerBound/target)
   resources:
     requests:
-      cpu: 63m
-      memory: 363Mi
+      cpu: 10m
+      memory: 225Mi
     limits:
-      memory: 363Mi
+      cpu: 69m
+      memory: 323Mi
 
 # Disable components not needed in single binary mode
 backend:
diff --git a/node-exporter/helm-values.yaml b/node-exporter/helm-values.yaml
index 36abc23..53e32c7 100644
--- a/node-exporter/helm-values.yaml
+++ b/node-exporter/helm-values.yaml
@@ -6,11 +6,13 @@ fullnameOverride: node-exporter
 hostNetwork: true
 hostPID: true
 
+# Resource settings (VPA lowerBound/upperBound)
 resources:
   requests:
     cpu: 15m
     memory: 100Mi
   limits:
+    cpu: 15m
     memory: 100Mi
 
 service:
diff --git a/opentelemetry/helm-values.yaml b/opentelemetry/helm-values.yaml
index 7931244..c73f5f6 100644
--- a/opentelemetry/helm-values.yaml
+++ b/opentelemetry/helm-values.yaml
@@ -28,14 +28,15 @@ image:
 mode: daemonset
 
 # =============================================================================
-# Resource Limits (based on VPA recommendation)
+# Resource Limits (VPA lowerBound/upperBound, mem limit capped at 1024Mi)
 # =============================================================================
 resources:
   requests:
-    cpu: 25m
-    memory: 400Mi
+    cpu: 34m
+    memory: 142Mi
   limits:
-    memory: 400Mi
+    cpu: 410m
+    memory: 1024Mi
 
 # =============================================================================
 # Environment Variables
diff --git a/prometheus/helm-values.yaml b/prometheus/helm-values.yaml
index e75b12e..b6b9c69 100644
--- a/prometheus/helm-values.yaml
+++ b/prometheus/helm-values.yaml
@@ -14,6 +14,14 @@ prometheusOperator:
   enabled: true
   # CRD 생성 비활성화
   createCustomResource: false
+  # Resource settings (VPA lowerBound/upperBound)
+  resources:
+    requests:
+      cpu: 15m
+      memory: 100Mi
+    limits:
+      cpu: 15m
+      memory: 100Mi
 
 # Kubelet ServiceMonitor with cluster label
 kubelet:
diff --git a/tempo/helm-values.yaml b/tempo/helm-values.yaml
index 8eaab14..eedd287 100644
--- a/tempo/helm-values.yaml
+++ b/tempo/helm-values.yaml
@@ -17,13 +17,14 @@ replicas: 1
 # Tempo Configuration
 # =============================================================================
 tempo:
-  # Resource Limits (optimized for small cluster)
+  # Resource settings (VPA lowerBound/target)
   resources:
     requests:
       cpu: 15m
       memory: 100Mi
     limits:
-      memory: 100Mi
+      cpu: 15m
+      memory: 109Mi
   # Receivers - protocols Tempo accepts
   receivers:
     otlp:
diff --git a/thanos/helm-values.yaml b/thanos/helm-values.yaml
index 356299e..38b367d 100644
--- a/thanos/helm-values.yaml
+++ b/thanos/helm-values.yaml
@@ -46,12 +46,14 @@ query:
     - --query.replica-label=prometheus_replica
     - --query.auto-downsampling
 
+  # Resource settings (VPA lowerBound/target)
   resources:
     requests:
       cpu: 15m
-      memory: 283Mi
+      memory: 100Mi
     limits:
-      memory: 283Mi
+      cpu: 15m
+      memory: 126Mi
 
 # =============================================================================
 # Query Frontend - Caching layer for Query (optional, disabled for small cluster)
diff --git a/vpa/helm-values.yaml b/vpa/helm-values.yaml
index 8901c36..58baafe 100644
--- a/vpa/helm-values.yaml
+++ b/vpa/helm-values.yaml
@@ -6,11 +6,13 @@ recommender:
   enabled: true
   replicaCount: 1
 
+  # Resource settings (VPA lowerBound/upperBound)
   resources:
     requests:
       cpu: 15m
       memory: 100Mi
     limits:
+      cpu: 15m
       memory: 100Mi
 
 # Updater - applies recommended resource requests to pods