FEAT(blackbox-exporter): add external endpoint monitoring
- Add blackbox-exporter with prometheus-community Helm chart - Configure HTTP probes for 25 external endpoints - Include SSL certificate expiry alerting rules - Add probe failure and slow response alerts - Deploy 2 replicas with anti-affinity for HA
This commit is contained in:
186
blackbox-exporter/helm-values.yaml
Normal file
186
blackbox-exporter/helm-values.yaml
Normal file
@@ -0,0 +1,186 @@
|
||||
# Prometheus Blackbox Exporter Helm Values
|
||||
# Chart: https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus-blackbox-exporter
|
||||
|
||||
fullnameOverride: blackbox-exporter
|
||||
|
||||
replicas: 2
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 15m
|
||||
memory: 64Mi
|
||||
limits:
|
||||
memory: 64Mi
|
||||
|
||||
config:
|
||||
modules:
|
||||
http_2xx:
|
||||
prober: http
|
||||
timeout: 10s
|
||||
http:
|
||||
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
|
||||
valid_status_codes: [200, 301, 302, 303]
|
||||
method: GET
|
||||
follow_redirects: true
|
||||
preferred_ip_protocol: ip4
|
||||
tls_config:
|
||||
insecure_skip_verify: false
|
||||
http_2xx_insecure:
|
||||
prober: http
|
||||
timeout: 10s
|
||||
http:
|
||||
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
|
||||
valid_status_codes: [200, 301, 302, 303]
|
||||
method: GET
|
||||
follow_redirects: true
|
||||
preferred_ip_protocol: ip4
|
||||
tls_config:
|
||||
insecure_skip_verify: true
|
||||
tcp_connect:
|
||||
prober: tcp
|
||||
timeout: 5s
|
||||
icmp:
|
||||
prober: icmp
|
||||
timeout: 5s
|
||||
icmp:
|
||||
preferred_ip_protocol: ip4
|
||||
|
||||
serviceMonitor:
|
||||
enabled: true
|
||||
defaults:
|
||||
additionalMetricsRelabels: {}
|
||||
interval: 60s
|
||||
scrapeTimeout: 30s
|
||||
module: http_2xx
|
||||
additionalLabels:
|
||||
release: prometheus
|
||||
targets:
|
||||
# Infrastructure Services
|
||||
- name: argocd
|
||||
url: https://argocd0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: grafana
|
||||
url: https://grafana0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: vault
|
||||
url: https://vault0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: authelia
|
||||
url: https://auth0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: karma
|
||||
url: https://karma0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: gitea
|
||||
url: https://github0213.com
|
||||
module: http_2xx
|
||||
- name: minio-console
|
||||
url: https://minio0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: velero-ui
|
||||
url: https://velero0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: headlamp
|
||||
url: https://kubernetes0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: goldilocks
|
||||
url: https://goldilocks0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: code-server
|
||||
url: https://vscode0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: pgweb
|
||||
url: https://pgweb0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: zot
|
||||
url: https://zot0213.kro.kr
|
||||
module: http_2xx
|
||||
|
||||
# User Applications
|
||||
- name: homer
|
||||
url: https://mayne.kro.kr
|
||||
module: http_2xx
|
||||
- name: portfolio
|
||||
url: https://minjo0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: docusaurus
|
||||
url: https://docusaurus0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: jotion
|
||||
url: https://jotion0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: jovies
|
||||
url: https://jovies.kro.kr
|
||||
module: http_2xx
|
||||
- name: todo
|
||||
url: https://todo0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: umami
|
||||
url: https://umami0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: mas
|
||||
url: https://mas0213.kro.kr
|
||||
module: http_2xx
|
||||
- name: jaejadle
|
||||
url: https://jaejadle.kro.kr
|
||||
module: http_2xx
|
||||
- name: jaejadle-dev
|
||||
url: https://dev.jaejadle.kro.kr
|
||||
module: http_2xx
|
||||
- name: joossam
|
||||
url: https://joossameng.kro.kr
|
||||
module: http_2xx
|
||||
- name: joossam-dev
|
||||
url: https://dev.joossameng.kro.kr
|
||||
module: http_2xx
|
||||
|
||||
prometheusRule:
|
||||
enabled: true
|
||||
additionalLabels:
|
||||
release: prometheus
|
||||
rules:
|
||||
- alert: BlackboxProbeFailed
|
||||
expr: probe_success == 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "Blackbox probe failed for {{ $labels.target }}"
|
||||
description: "Probe {{ $labels.instance }} has been failing for more than 5 minutes."
|
||||
- alert: BlackboxSlowProbe
|
||||
expr: avg_over_time(probe_duration_seconds[5m]) > 5
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Blackbox slow probe for {{ $labels.target }}"
|
||||
description: "Probe {{ $labels.instance }} took more than 5s to complete."
|
||||
- alert: BlackboxSslCertificateWillExpireSoon
|
||||
expr: (probe_ssl_earliest_cert_expiry - time()) / 86400 < 30
|
||||
for: 1h
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "SSL certificate will expire soon for {{ $labels.target }}"
|
||||
description: "SSL certificate expires in {{ $value | humanizeDuration }} for {{ $labels.instance }}."
|
||||
- alert: BlackboxSslCertificateExpired
|
||||
expr: (probe_ssl_earliest_cert_expiry - time()) <= 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: critical
|
||||
annotations:
|
||||
summary: "SSL certificate has expired for {{ $labels.target }}"
|
||||
description: "SSL certificate has expired for {{ $labels.instance }}."
|
||||
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: app.kubernetes.io/name
|
||||
operator: In
|
||||
values:
|
||||
- prometheus-blackbox-exporter
|
||||
topologyKey: kubernetes.io/hostname
|
||||
Reference in New Issue
Block a user