- Compactor was OOMKilled with 128Mi limit - Set to 256Mi for stability during compaction
152 lines
4.4 KiB
YAML
152 lines
4.4 KiB
YAML
# Thanos Helm Values
|
|
# Chart: https://github.com/bitnami/charts/tree/main/bitnami/thanos
|
|
#
|
|
# Architecture:
|
|
# - Prometheus (prometheus namespace) + Sidecar → uploads to MinIO
|
|
# - Query: queries Sidecar + Store Gateway, deduplicates data
|
|
# - Store Gateway: reads historical data from MinIO
|
|
# - Compactor: compacts and downsamples data in MinIO
|
|
|
|
# Allow non-Bitnami images (quay.io/thanos/thanos)
|
|
global:
|
|
security:
|
|
allowInsecureImages: true
|
|
|
|
# Use quay.io image to avoid Docker Hub rate limits
|
|
image:
|
|
registry: quay.io
|
|
repository: thanos/thanos
|
|
tag: v0.37.2
|
|
|
|
# Object storage configuration (MinIO S3)
|
|
# Uses secret created by ExternalSecret
|
|
existingObjstoreSecret: thanos-objstore-secret
|
|
|
|
# =============================================================================
|
|
# Query - Main query endpoint (Grafana connects here)
|
|
# =============================================================================
|
|
query:
|
|
enabled: true
|
|
replicaCount: 1
|
|
|
|
# Run on master node for stability
|
|
tolerations:
|
|
- key: node-role.kubernetes.io/control-plane
|
|
operator: Exists
|
|
effect: NoSchedule
|
|
nodeSelector:
|
|
node-role.kubernetes.io/control-plane: "true"
|
|
|
|
# Deduplicate metrics from multiple Prometheus replicas
|
|
dnsDiscovery:
|
|
enabled: true
|
|
sidecarsService: prometheus-kube-prometheus-thanos-discovery
|
|
sidecarsNamespace: prometheus
|
|
|
|
# Store endpoints - chart automatically adds storegateway
|
|
# No need to specify manually
|
|
|
|
# Deduplication settings
|
|
extraFlags:
|
|
- --query.replica-label=prometheus_replica
|
|
- --query.auto-downsampling
|
|
|
|
resources:
|
|
requests:
|
|
cpu: 15m
|
|
memory: 128Mi
|
|
limits:
|
|
memory: 128Mi
|
|
|
|
# =============================================================================
|
|
# Query Frontend - Caching layer for Query (optional, disabled for small cluster)
|
|
# =============================================================================
|
|
queryFrontend:
|
|
enabled: false
|
|
|
|
# =============================================================================
|
|
# Store Gateway - Reads historical data from S3
|
|
# =============================================================================
|
|
storegateway:
|
|
enabled: true
|
|
replicaCount: 1
|
|
|
|
# Run on master node for stability
|
|
tolerations:
|
|
- key: node-role.kubernetes.io/control-plane
|
|
operator: Exists
|
|
effect: NoSchedule
|
|
nodeSelector:
|
|
node-role.kubernetes.io/control-plane: "true"
|
|
|
|
resources:
|
|
requests:
|
|
cpu: 15m
|
|
memory: 128Mi
|
|
limits:
|
|
memory: 128Mi
|
|
|
|
persistence:
|
|
enabled: true
|
|
storageClass: local-path
|
|
size: 2Gi
|
|
|
|
# =============================================================================
|
|
# Compactor - Compacts and downsamples data in S3
|
|
# =============================================================================
|
|
compactor:
|
|
enabled: true
|
|
|
|
# Run on master node for stability
|
|
tolerations:
|
|
- key: node-role.kubernetes.io/control-plane
|
|
operator: Exists
|
|
effect: NoSchedule
|
|
nodeSelector:
|
|
node-role.kubernetes.io/control-plane: "true"
|
|
|
|
# Retention settings
|
|
retentionResolutionRaw: 7d # Keep raw data for 7 days
|
|
retentionResolution5m: 30d # Keep 5m downsampled for 30 days
|
|
retentionResolution1h: 90d # Keep 1h downsampled for 90 days
|
|
|
|
resources:
|
|
requests:
|
|
cpu: 15m
|
|
memory: 256Mi
|
|
limits:
|
|
memory: 256Mi
|
|
|
|
persistence:
|
|
enabled: true
|
|
storageClass: local-path
|
|
size: 2Gi
|
|
|
|
# =============================================================================
|
|
# Ruler - Alerting rules evaluation (disabled, using Prometheus rules)
|
|
# =============================================================================
|
|
ruler:
|
|
enabled: false
|
|
|
|
# =============================================================================
|
|
# Receive - Remote write endpoint (disabled, using Sidecar)
|
|
# =============================================================================
|
|
receive:
|
|
enabled: false
|
|
|
|
# =============================================================================
|
|
# Sidecar - Disabled here, enabled in Prometheus helm-values
|
|
# =============================================================================
|
|
# The sidecar is deployed alongside Prometheus via kube-prometheus-stack
|
|
|
|
# =============================================================================
|
|
# Metrics
|
|
# =============================================================================
|
|
metrics:
|
|
enabled: true
|
|
serviceMonitor:
|
|
enabled: true
|
|
namespace: thanos
|
|
labels:
|
|
release: prometheus
|