PERF(observability): reduce replicas, add priority

- Reduce Prometheus replicas from 2 to 1
- Reduce Grafana replicas from 2 to 1
- Reduce Blackbox-exporter replicas from 2 to 1
- Move Loki, Thanos, Tempo to workers (remove tolerations)
- Add medium-priority to Prometheus, Loki, Thanos, Tempo
This commit is contained in:
2026-01-10 13:15:03 +09:00
parent c34f56945a
commit 9e218a8adc
6 changed files with 13 additions and 25 deletions

View File

@@ -3,7 +3,7 @@
fullnameOverride: blackbox-exporter
replicas: 2
replicas: 1
resources:
requests:

View File

@@ -3,7 +3,7 @@
fullnameOverride: grafana
replicas: 2
replicas: 1
affinity:
podAntiAffinity:

View File

@@ -56,13 +56,8 @@ singleBinary:
extraVolumeMounts:
- name: data
mountPath: /var/loki
# Run on master node for stability
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
nodeSelector:
node-role.kubernetes.io/control-plane: "true"
# Medium priority for observability
priorityClassName: medium-priority
resources:
requests:
cpu: 23m

View File

@@ -70,10 +70,13 @@ prometheus:
# Enable remote write receiver for OTel Collector
enableRemoteWriteReceiver: true
# HA: 2 replicas on different worker nodes
replicas: 2
# Single replica (HA removed for resource optimization)
replicas: 1
replicaExternalLabelName: prometheus_replica
# Medium priority for observability
priorityClassName: medium-priority
# Pod anti-affinity for HA
affinity:
podAntiAffinity:

View File

@@ -7,13 +7,8 @@
# - OTLP receiver for OpenTelemetry data
# - Integrates with Grafana for trace visualization
# Run on master node for stability
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
nodeSelector:
node-role.kubernetes.io/control-plane: "true"
# Medium priority for observability
priorityClassName: medium-priority
# =============================================================================
# Resource Limits (optimized for small cluster)

View File

@@ -29,13 +29,8 @@ query:
enabled: true
replicaCount: 1
# Run on master node for stability
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
nodeSelector:
node-role.kubernetes.io/control-plane: "true"
# Medium priority for observability
priorityClassName: medium-priority
# Deduplicate metrics from multiple Prometheus replicas
dnsDiscovery: