PERF(observability): reduce replicas, add priority

- Reduce Prometheus replicas from 2 to 1
- Reduce Grafana replicas from 2 to 1
- Reduce Blackbox-exporter replicas from 2 to 1
- Move Loki, Thanos, Tempo to workers (remove tolerations)
- Add medium-priority to Prometheus, Loki, Thanos, Tempo
This commit is contained in:
2026-01-10 13:15:03 +09:00
parent c34f56945a
commit 9e218a8adc
6 changed files with 13 additions and 25 deletions

View File

@@ -3,7 +3,7 @@
fullnameOverride: blackbox-exporter fullnameOverride: blackbox-exporter
replicas: 2 replicas: 1
resources: resources:
requests: requests:

View File

@@ -3,7 +3,7 @@
fullnameOverride: grafana fullnameOverride: grafana
replicas: 2 replicas: 1
affinity: affinity:
podAntiAffinity: podAntiAffinity:

View File

@@ -56,13 +56,8 @@ singleBinary:
extraVolumeMounts: extraVolumeMounts:
- name: data - name: data
mountPath: /var/loki mountPath: /var/loki
# Run on master node for stability # Medium priority for observability
tolerations: priorityClassName: medium-priority
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
nodeSelector:
node-role.kubernetes.io/control-plane: "true"
resources: resources:
requests: requests:
cpu: 23m cpu: 23m

View File

@@ -70,10 +70,13 @@ prometheus:
# Enable remote write receiver for OTel Collector # Enable remote write receiver for OTel Collector
enableRemoteWriteReceiver: true enableRemoteWriteReceiver: true
# HA: 2 replicas on different worker nodes # Single replica (HA removed for resource optimization)
replicas: 2 replicas: 1
replicaExternalLabelName: prometheus_replica replicaExternalLabelName: prometheus_replica
# Medium priority for observability
priorityClassName: medium-priority
# Pod anti-affinity for HA # Pod anti-affinity for HA
affinity: affinity:
podAntiAffinity: podAntiAffinity:

View File

@@ -7,13 +7,8 @@
# - OTLP receiver for OpenTelemetry data # - OTLP receiver for OpenTelemetry data
# - Integrates with Grafana for trace visualization # - Integrates with Grafana for trace visualization
# Run on master node for stability # Medium priority for observability
tolerations: priorityClassName: medium-priority
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
nodeSelector:
node-role.kubernetes.io/control-plane: "true"
# ============================================================================= # =============================================================================
# Resource Limits (optimized for small cluster) # Resource Limits (optimized for small cluster)

View File

@@ -29,13 +29,8 @@ query:
enabled: true enabled: true
replicaCount: 1 replicaCount: 1
# Run on master node for stability # Medium priority for observability
tolerations: priorityClassName: medium-priority
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
nodeSelector:
node-role.kubernetes.io/control-plane: "true"
# Deduplicate metrics from multiple Prometheus replicas # Deduplicate metrics from multiple Prometheus replicas
dnsDiscovery: dnsDiscovery: