Files
observability/blackbox-exporter/helm-values.yaml
Mayne0213 7e61af372b PERF(observability): remove CPU limits for stability
- Remove CPU limits from all observability components
- Prevents CPU throttling issues across monitoring stack
2026-01-12 02:10:54 +09:00

188 lines
5.0 KiB
YAML

# Prometheus Blackbox Exporter Helm Values
# Chart: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-blackbox-exporter
fullnameOverride: blackbox-exporter
replicas: 1
# Resource settings (no CPU limit for stability)
resources:
requests:
cpu: 15m
memory: 100Mi
limits:
memory: 100Mi
config:
modules:
http_2xx:
prober: http
timeout: 10s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
valid_status_codes: [200, 301, 302, 303]
method: GET
follow_redirects: true
preferred_ip_protocol: ip4
tls_config:
insecure_skip_verify: false
http_2xx_insecure:
prober: http
timeout: 10s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
valid_status_codes: [200, 301, 302, 303]
method: GET
follow_redirects: true
preferred_ip_protocol: ip4
tls_config:
insecure_skip_verify: true
tcp_connect:
prober: tcp
timeout: 5s
icmp:
prober: icmp
timeout: 5s
icmp:
preferred_ip_protocol: ip4
serviceMonitor:
enabled: true
defaults:
additionalMetricsRelabels: {}
interval: 60s
scrapeTimeout: 30s
module: http_2xx
additionalLabels:
release: prometheus
targets:
# Infrastructure Services
- name: argocd
url: https://argocd0213.kro.kr
module: http_2xx
- name: grafana
url: https://grafana0213.kro.kr
module: http_2xx
- name: vault
url: https://vault0213.kro.kr
module: http_2xx
- name: authelia
url: https://auth0213.kro.kr
module: http_2xx
- name: karma
url: https://karma0213.kro.kr
module: http_2xx
- name: gitea
url: https://github0213.com
module: http_2xx
- name: minio-console
url: https://minio0213.kro.kr
module: http_2xx
- name: velero-ui
url: https://velero0213.kro.kr
module: http_2xx
- name: headlamp
url: https://kubernetes0213.kro.kr
module: http_2xx
- name: goldilocks
url: https://goldilocks0213.kro.kr
module: http_2xx
- name: code-server
url: https://vscode0213.kro.kr
module: http_2xx
- name: pgweb
url: https://pgweb0213.kro.kr
module: http_2xx
- name: zot
url: https://zot0213.kro.kr
module: http_2xx
# User Applications
- name: homer
url: https://mayne.kro.kr
module: http_2xx
- name: portfolio
url: https://minjo0213.kro.kr
module: http_2xx
- name: docusaurus
url: https://docusaurus0213.kro.kr
module: http_2xx
- name: jotion
url: https://jotion0213.kro.kr
module: http_2xx
- name: jovies
url: https://jovies.kro.kr
module: http_2xx
- name: todo
url: https://todo0213.kro.kr
module: http_2xx
- name: umami
url: https://umami0213.kro.kr
module: http_2xx
- name: mas
url: https://mas0213.kro.kr
module: http_2xx
- name: jaejadle
url: https://jaejadle.kro.kr
module: http_2xx
- name: jaejadle-dev
url: https://dev.jaejadle.kro.kr
module: http_2xx
- name: joossam
url: https://joossameng.kro.kr
module: http_2xx
- name: joossam-dev
url: https://dev.joossameng.kro.kr
module: http_2xx
prometheusRule:
enabled: true
additionalLabels:
release: prometheus
rules:
- alert: BlackboxProbeFailed
expr: probe_success == 0
for: 5m
labels:
severity: critical
annotations:
summary: "Blackbox probe failed for {{ $labels.target }}"
description: "Probe {{ $labels.instance }} has been failing for more than 5 minutes."
- alert: BlackboxSlowProbe
expr: avg_over_time(probe_duration_seconds[5m]) > 5
for: 5m
labels:
severity: warning
annotations:
summary: "Blackbox slow probe for {{ $labels.target }}"
description: "Probe {{ $labels.instance }} took more than 5s to complete."
- alert: BlackboxSslCertificateWillExpireSoon
expr: (probe_ssl_earliest_cert_expiry - time()) / 86400 < 30
for: 1h
labels:
severity: warning
annotations:
summary: "SSL certificate will expire soon for {{ $labels.target }}"
description: "SSL certificate expires in {{ $value | humanizeDuration }} for {{ $labels.instance }}."
- alert: BlackboxSslCertificateExpired
expr: (probe_ssl_earliest_cert_expiry - time()) <= 0
for: 5m
labels:
severity: critical
annotations:
summary: "SSL certificate has expired for {{ $labels.target }}"
description: "SSL certificate has expired for {{ $labels.instance }}."
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app.kubernetes.io/name
operator: In
values:
- prometheus-blackbox-exporter
topologyKey: kubernetes.io/hostname