From aecb15031db4d2e3e66df9f3aa10bd6abd5fd55b Mon Sep 17 00:00:00 2001 From: Mayne0213 Date: Thu, 8 Jan 2026 21:27:24 +0900 Subject: [PATCH] FEAT(grafana): add Thanos as default datasource - Add Thanos Query as default Prometheus datasource - Keep original Prometheus datasource as backup - Thanos provides deduplicated metrics from HA Prometheus REFACTOR(thanos): move all components to master node - Add tolerations for control-plane:NoSchedule - Add nodeSelector for control-plane node - Affects: query, storegateway, compactor - PVC will be recreated on master node (data in S3) FIX(thanos): allow non-Bitnami images (quay.io/thanos) FIX(thanos): correct nodeSelector value to 'true' --- grafana/helm-values.yaml | 10 +++++++++- thanos/helm-values.yaml | 29 +++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/grafana/helm-values.yaml b/grafana/helm-values.yaml index 2cf7e09..5eb65e2 100644 --- a/grafana/helm-values.yaml +++ b/grafana/helm-values.yaml @@ -54,11 +54,19 @@ datasources: datasources.yaml: apiVersion: 1 datasources: + - name: Thanos + type: prometheus + access: proxy + url: http://thanos-query.thanos.svc.cluster.local:9090 + isDefault: true + editable: true + jsonData: + timeInterval: "60s" - name: Prometheus type: prometheus access: proxy url: http://prometheus-kube-prometheus-prometheus.prometheus.svc.cluster.local:9090 - isDefault: true + isDefault: false editable: true - name: Loki type: loki diff --git a/thanos/helm-values.yaml b/thanos/helm-values.yaml index 3e96530..229758f 100644 --- a/thanos/helm-values.yaml +++ b/thanos/helm-values.yaml @@ -7,6 +7,11 @@ # - Store Gateway: reads historical data from MinIO # - Compactor: compacts and downsamples data in MinIO +# Allow non-Bitnami images (quay.io/thanos/thanos) +global: + security: + allowInsecureImages: true + # Use quay.io image to avoid Docker Hub rate limits image: registry: quay.io @@ -24,6 +29,14 @@ query: enabled: true replicaCount: 1 + # Run on master node for stability + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + nodeSelector: + node-role.kubernetes.io/control-plane: "true" + # Deduplicate metrics from multiple Prometheus replicas dnsDiscovery: enabled: true @@ -58,6 +71,14 @@ storegateway: enabled: true replicaCount: 1 + # Run on master node for stability + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + nodeSelector: + node-role.kubernetes.io/control-plane: "true" + resources: requests: cpu: 15m @@ -76,6 +97,14 @@ storegateway: compactor: enabled: true + # Run on master node for stability + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + nodeSelector: + node-role.kubernetes.io/control-plane: "true" + # Retention settings retentionResolutionRaw: 7d # Keep raw data for 7 days retentionResolution5m: 30d # Keep 5m downsampled for 30 days