Compare commits
21 Commits
2b1667e643
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| b145881fa2 | |||
| 7e61af372b | |||
| 3b5bf20902 | |||
| a70403d1ae | |||
| 7cbc0c810e | |||
| 904cc3cab6 | |||
| c1214029a2 | |||
| 4aa7e37f76 | |||
| 4bdcaf8fcd | |||
| 43cf7e9de7 | |||
| 15d5e58d6c | |||
| 7d0c8aa5f3 | |||
| 9c00c42946 | |||
| a08d989fc3 | |||
| 203a8debac | |||
| c128ece672 | |||
| bcf60b2428 | |||
| da89c8dbf0 | |||
| 11f9457236 | |||
| 7e375e20c6 | |||
| b818a8c1fe |
@@ -14,10 +14,10 @@ spec:
|
|||||||
helm:
|
helm:
|
||||||
valueFiles:
|
valueFiles:
|
||||||
- $values/alertmanager/helm-values.yaml
|
- $values/alertmanager/helm-values.yaml
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
ref: values
|
ref: values
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
path: alertmanager
|
path: alertmanager
|
||||||
kustomize: {}
|
kustomize: {}
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ affinity:
|
|||||||
persistence:
|
persistence:
|
||||||
enabled: false
|
enabled: false
|
||||||
|
|
||||||
|
# Resource settings (no CPU limit for stability)
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 15m
|
cpu: 15m
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ spec:
|
|||||||
data:
|
data:
|
||||||
- secretKey: smtp_auth_password
|
- secretKey: smtp_auth_password
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: alertmanager
|
key: observability/alertmanager
|
||||||
property: SMTP_PASSWORD
|
property: SMTP_PASSWORD
|
||||||
---
|
---
|
||||||
apiVersion: external-secrets.io/v1
|
apiVersion: external-secrets.io/v1
|
||||||
@@ -81,5 +81,5 @@ spec:
|
|||||||
data:
|
data:
|
||||||
- secretKey: smtp_password
|
- secretKey: smtp_password
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: alertmanager
|
key: observability/alertmanager
|
||||||
property: SMTP_PASSWORD
|
property: SMTP_PASSWORD
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ spec:
|
|||||||
project: default
|
project: default
|
||||||
|
|
||||||
source:
|
source:
|
||||||
repoURL: https://github.com/K3S-HOME/observability.git
|
repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
path: .
|
path: .
|
||||||
|
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ spec:
|
|||||||
helm:
|
helm:
|
||||||
valueFiles:
|
valueFiles:
|
||||||
- $values/blackbox-exporter/helm-values.yaml
|
- $values/blackbox-exporter/helm-values.yaml
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
ref: values
|
ref: values
|
||||||
destination:
|
destination:
|
||||||
|
|||||||
@@ -5,9 +5,10 @@ fullnameOverride: blackbox-exporter
|
|||||||
|
|
||||||
replicas: 1
|
replicas: 1
|
||||||
|
|
||||||
|
# Resource settings (no CPU limit for stability)
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 23m
|
cpu: 15m
|
||||||
memory: 100Mi
|
memory: 100Mi
|
||||||
limits:
|
limits:
|
||||||
memory: 100Mi
|
memory: 100Mi
|
||||||
|
|||||||
@@ -14,10 +14,10 @@ spec:
|
|||||||
helm:
|
helm:
|
||||||
valueFiles:
|
valueFiles:
|
||||||
- $values/goldilocks/helm-values.yaml
|
- $values/goldilocks/helm-values.yaml
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
ref: values
|
ref: values
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
path: goldilocks
|
path: goldilocks
|
||||||
destination:
|
destination:
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ dashboard:
|
|||||||
enabled: true
|
enabled: true
|
||||||
replicaCount: 1
|
replicaCount: 1
|
||||||
|
|
||||||
|
# Resource settings (no CPU limit for stability)
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 15m
|
cpu: 15m
|
||||||
@@ -49,6 +50,7 @@ controller:
|
|||||||
enabled: true
|
enabled: true
|
||||||
replicaCount: 1
|
replicaCount: 1
|
||||||
|
|
||||||
|
# Resource settings (no CPU limit for stability)
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 15m
|
cpu: 15m
|
||||||
@@ -60,15 +62,6 @@ controller:
|
|||||||
# Set to false to only monitor namespaces with the label: goldilocks.fairwinds.com/enabled=true
|
# Set to false to only monitor namespaces with the label: goldilocks.fairwinds.com/enabled=true
|
||||||
enableCostRecommendations: true
|
enableCostRecommendations: true
|
||||||
|
|
||||||
# Schedule on control-plane node
|
|
||||||
nodeSelector:
|
|
||||||
node-role.kubernetes.io/control-plane: "true"
|
|
||||||
|
|
||||||
tolerations:
|
|
||||||
- key: node-role.kubernetes.io/control-plane
|
|
||||||
operator: Exists
|
|
||||||
effect: NoSchedule
|
|
||||||
|
|
||||||
# VPA configuration (should already be installed)
|
# VPA configuration (should already be installed)
|
||||||
vpa:
|
vpa:
|
||||||
# Set to false since we're installing VPA separately
|
# Set to false since we're installing VPA separately
|
||||||
|
|||||||
@@ -14,10 +14,10 @@ spec:
|
|||||||
helm:
|
helm:
|
||||||
valueFiles:
|
valueFiles:
|
||||||
- $values/grafana/helm-values.yaml
|
- $values/grafana/helm-values.yaml
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
ref: values
|
ref: values
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
path: grafana
|
path: grafana
|
||||||
destination:
|
destination:
|
||||||
|
|||||||
2629
grafana/dashboards/APM.json
Normal file
2629
grafana/dashboards/APM.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1987,14 +1987,14 @@
|
|||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"exemplar": true,
|
"exemplar": true,
|
||||||
"expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{image!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (namespace) > 0 or vector(0)",
|
"expr": "sum(rate(container_pressure_cpu_waiting_seconds_total{pod!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (namespace) > 0.1",
|
||||||
"interval": "$resolution",
|
"interval": "$resolution",
|
||||||
"legendFormat": "{{ namespace }}",
|
"legendFormat": "{{ namespace }}",
|
||||||
"range": true,
|
"range": true,
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"title": "CPU Throttled seconds by namespace",
|
"title": "CPU Pressure (waiting) by namespace",
|
||||||
"type": "timeseries"
|
"type": "timeseries"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -2099,14 +2099,14 @@
|
|||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"exemplar": true,
|
"exemplar": true,
|
||||||
"expr": "sum(rate(node_cpu_core_throttles_total{cluster=\"$cluster\", job=\"$job\"}[$__rate_interval])) by (instance) or vector(0)",
|
"expr": "sum(rate(node_pressure_cpu_waiting_seconds_total{cluster=\"$cluster\", job=\"$job\"}[$__rate_interval])) by (instance) > 0.1",
|
||||||
"interval": "$resolution",
|
"interval": "$resolution",
|
||||||
"legendFormat": "{{ instance }}",
|
"legendFormat": "{{ instance }}",
|
||||||
"range": true,
|
"range": true,
|
||||||
"refId": "A"
|
"refId": "A"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"title": "CPU Core Throttled by instance",
|
"title": "Node CPU Pressure (waiting) by instance",
|
||||||
"type": "timeseries"
|
"type": "timeseries"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -2242,8 +2242,9 @@
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"unit": "s",
|
"unit": "percent",
|
||||||
"unitScale": true
|
"min": 0,
|
||||||
|
"max": 100
|
||||||
},
|
},
|
||||||
"overrides": []
|
"overrides": []
|
||||||
},
|
},
|
||||||
@@ -2278,7 +2279,7 @@
|
|||||||
},
|
},
|
||||||
"editorMode": "code",
|
"editorMode": "code",
|
||||||
"exemplar": true,
|
"exemplar": true,
|
||||||
"expr": "rate(minio_node_process_cpu_total_seconds{job=~\"$scrape_jobs\"}[5m])",
|
"expr": "rate(minio_node_process_cpu_total_seconds{job=\"minio\"}[5m]) * 100",
|
||||||
"interval": "",
|
"interval": "",
|
||||||
"legendFormat": "{{server}}",
|
"legendFormat": "{{server}}",
|
||||||
"range": true,
|
"range": true,
|
||||||
|
|||||||
@@ -39,12 +39,13 @@ podSecurityContext:
|
|||||||
fsGroup: 472
|
fsGroup: 472
|
||||||
fsGroupChangePolicy: "Always"
|
fsGroupChangePolicy: "Always"
|
||||||
|
|
||||||
|
# Resource settings (no CPU limit for stability)
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 23m
|
cpu: 22m
|
||||||
memory: 175Mi
|
memory: 144Mi
|
||||||
limits:
|
limits:
|
||||||
memory: 175Mi
|
memory: 242Mi
|
||||||
|
|
||||||
service:
|
service:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
@@ -80,6 +81,11 @@ datasources:
|
|||||||
editable: true
|
editable: true
|
||||||
jsonData:
|
jsonData:
|
||||||
implementation: prometheus
|
implementation: prometheus
|
||||||
|
- name: Tempo
|
||||||
|
type: tempo
|
||||||
|
access: proxy
|
||||||
|
url: http://tempo.tempo.svc.cluster.local:3100
|
||||||
|
editable: true
|
||||||
|
|
||||||
# Dashboards are manually imported via Grafana UI
|
# Dashboards are manually imported via Grafana UI
|
||||||
# JSON files stored in dashboards/ directory for reference
|
# JSON files stored in dashboards/ directory for reference
|
||||||
|
|||||||
@@ -14,5 +14,5 @@ spec:
|
|||||||
data:
|
data:
|
||||||
- secretKey: password
|
- secretKey: password
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: postgresql
|
key: storage/postgresql
|
||||||
property: PASSWORD
|
property: PASSWORD
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ spec:
|
|||||||
helm:
|
helm:
|
||||||
valueFiles:
|
valueFiles:
|
||||||
- $values/kube-state-metrics/helm-values.yaml
|
- $values/kube-state-metrics/helm-values.yaml
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
ref: values
|
ref: values
|
||||||
destination:
|
destination:
|
||||||
|
|||||||
@@ -7,21 +7,13 @@ fullnameOverride: kube-state-metrics
|
|||||||
# Note: kube-state-metrics는 stateless이지만, 여러 replica는 동일한 메트릭을 중복 생성하므로
|
# Note: kube-state-metrics는 stateless이지만, 여러 replica는 동일한 메트릭을 중복 생성하므로
|
||||||
# 단일 replica로 실행하는 것이 권장됩니다.
|
# 단일 replica로 실행하는 것이 권장됩니다.
|
||||||
|
|
||||||
|
# Resource settings (no CPU limit for stability)
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 15m
|
cpu: 15m
|
||||||
memory: 105Mi
|
memory: 100Mi
|
||||||
limits:
|
limits:
|
||||||
memory: 105Mi
|
memory: 100Mi
|
||||||
|
|
||||||
# Schedule on control-plane node
|
|
||||||
nodeSelector:
|
|
||||||
node-role.kubernetes.io/control-plane: "true"
|
|
||||||
|
|
||||||
tolerations:
|
|
||||||
- key: node-role.kubernetes.io/control-plane
|
|
||||||
operator: Exists
|
|
||||||
effect: NoSchedule
|
|
||||||
|
|
||||||
service:
|
service:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
|
|||||||
@@ -12,8 +12,7 @@ resources:
|
|||||||
- loki/argocd.yaml
|
- loki/argocd.yaml
|
||||||
# promtail removed - OTel filelog receiver handles log collection
|
# promtail removed - OTel filelog receiver handles log collection
|
||||||
- tempo/argocd.yaml
|
- tempo/argocd.yaml
|
||||||
- opentelemetry-operator/argocd.yaml
|
- opentelemetry/argocd.yaml
|
||||||
- opentelemetry-collector/argocd.yaml
|
|
||||||
- node-exporter/argocd.yaml
|
- node-exporter/argocd.yaml
|
||||||
- kube-state-metrics/argocd.yaml
|
- kube-state-metrics/argocd.yaml
|
||||||
- goldilocks/argocd.yaml
|
- goldilocks/argocd.yaml
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ spec:
|
|||||||
helm:
|
helm:
|
||||||
valueFiles:
|
valueFiles:
|
||||||
- $values/loki/helm-values.yaml
|
- $values/loki/helm-values.yaml
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
ref: values
|
ref: values
|
||||||
destination:
|
destination:
|
||||||
|
|||||||
@@ -60,12 +60,13 @@ singleBinary:
|
|||||||
mountPath: /var/loki
|
mountPath: /var/loki
|
||||||
# Medium priority for observability
|
# Medium priority for observability
|
||||||
priorityClassName: medium-priority
|
priorityClassName: medium-priority
|
||||||
|
# Resource settings (no CPU limit for stability)
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 63m
|
cpu: 10m
|
||||||
memory: 363Mi
|
memory: 225Mi
|
||||||
limits:
|
limits:
|
||||||
memory: 363Mi
|
memory: 323Mi
|
||||||
|
|
||||||
# Disable components not needed in single binary mode
|
# Disable components not needed in single binary mode
|
||||||
backend:
|
backend:
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ spec:
|
|||||||
helm:
|
helm:
|
||||||
valueFiles:
|
valueFiles:
|
||||||
- $values/node-exporter/helm-values.yaml
|
- $values/node-exporter/helm-values.yaml
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
ref: values
|
ref: values
|
||||||
destination:
|
destination:
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ fullnameOverride: node-exporter
|
|||||||
hostNetwork: true
|
hostNetwork: true
|
||||||
hostPID: true
|
hostPID: true
|
||||||
|
|
||||||
|
# Resource settings (no CPU limit for stability)
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 15m
|
cpu: 15m
|
||||||
|
|||||||
@@ -1,38 +0,0 @@
|
|||||||
apiVersion: argoproj.io/v1alpha1
|
|
||||||
kind: Application
|
|
||||||
metadata:
|
|
||||||
name: opentelemetry-collector
|
|
||||||
namespace: argocd
|
|
||||||
finalizers:
|
|
||||||
- resources-finalizer.argocd.argoproj.io
|
|
||||||
annotations:
|
|
||||||
argocd.argoproj.io/sync-wave: "1"
|
|
||||||
spec:
|
|
||||||
project: default
|
|
||||||
source:
|
|
||||||
repoURL: https://github.com/K3S-HOME/observability.git
|
|
||||||
targetRevision: main
|
|
||||||
path: opentelemetry-collector/manifests
|
|
||||||
destination:
|
|
||||||
server: https://kubernetes.default.svc
|
|
||||||
namespace: opentelemetry
|
|
||||||
syncPolicy:
|
|
||||||
automated:
|
|
||||||
prune: true
|
|
||||||
selfHeal: true
|
|
||||||
allowEmpty: false
|
|
||||||
syncOptions:
|
|
||||||
- CreateNamespace=true
|
|
||||||
- PrunePropagationPolicy=foreground
|
|
||||||
- PruneLast=true
|
|
||||||
- ServerSideApply=true
|
|
||||||
retry:
|
|
||||||
limit: 5
|
|
||||||
backoff:
|
|
||||||
duration: 5s
|
|
||||||
factor: 2
|
|
||||||
maxDuration: 3m
|
|
||||||
managedNamespaceMetadata:
|
|
||||||
labels:
|
|
||||||
goldilocks.fairwinds.com/enabled: 'true'
|
|
||||||
revisionHistoryLimit: 10
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
|
||||||
kind: Kustomization
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- argocd.yaml
|
|
||||||
@@ -1,238 +0,0 @@
|
|||||||
# OpenTelemetry Collector with Target Allocator
|
|
||||||
# Managed by OpenTelemetry Operator
|
|
||||||
#
|
|
||||||
# Architecture:
|
|
||||||
# - DaemonSet mode: one collector per node for log collection
|
|
||||||
# - Target Allocator: distributes scrape targets across collectors
|
|
||||||
# - Filelog receiver for container logs
|
|
||||||
# - Prometheus receiver with Target Allocator for metrics
|
|
||||||
# - Exports to: Tempo (traces), Prometheus (metrics), Loki (logs)
|
|
||||||
apiVersion: opentelemetry.io/v1beta1
|
|
||||||
kind: OpenTelemetryCollector
|
|
||||||
metadata:
|
|
||||||
name: otel-collector
|
|
||||||
namespace: opentelemetry
|
|
||||||
spec:
|
|
||||||
mode: daemonset
|
|
||||||
image: otel/opentelemetry-collector-contrib:0.113.0
|
|
||||||
serviceAccount: otel-collector
|
|
||||||
|
|
||||||
# Target Allocator disabled - metrics collected by Prometheus directly
|
|
||||||
# OTel handles logs (filelog) and traces (otlp) only
|
|
||||||
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
cpu: 50m
|
|
||||||
memory: 512Mi
|
|
||||||
limits:
|
|
||||||
memory: 512Mi
|
|
||||||
|
|
||||||
tolerations:
|
|
||||||
- key: node-role.kubernetes.io/control-plane
|
|
||||||
operator: Exists
|
|
||||||
effect: NoSchedule
|
|
||||||
|
|
||||||
volumeMounts:
|
|
||||||
- name: varlogpods
|
|
||||||
mountPath: /var/log/pods
|
|
||||||
readOnly: true
|
|
||||||
- name: varlibdockercontainers
|
|
||||||
mountPath: /var/lib/docker/containers
|
|
||||||
readOnly: true
|
|
||||||
|
|
||||||
volumes:
|
|
||||||
- name: varlogpods
|
|
||||||
hostPath:
|
|
||||||
path: /var/log/pods
|
|
||||||
- name: varlibdockercontainers
|
|
||||||
hostPath:
|
|
||||||
path: /var/lib/docker/containers
|
|
||||||
|
|
||||||
ports:
|
|
||||||
- name: otlp-grpc
|
|
||||||
port: 4317
|
|
||||||
protocol: TCP
|
|
||||||
targetPort: 4317
|
|
||||||
- name: otlp-http
|
|
||||||
port: 4318
|
|
||||||
protocol: TCP
|
|
||||||
targetPort: 4318
|
|
||||||
- name: metrics
|
|
||||||
port: 8888
|
|
||||||
protocol: TCP
|
|
||||||
targetPort: 8888
|
|
||||||
|
|
||||||
env:
|
|
||||||
- name: K8S_NODE_NAME
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: spec.nodeName
|
|
||||||
- name: K8S_POD_NAME
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: metadata.name
|
|
||||||
- name: K8S_POD_IP
|
|
||||||
valueFrom:
|
|
||||||
fieldRef:
|
|
||||||
fieldPath: status.podIP
|
|
||||||
|
|
||||||
config:
|
|
||||||
receivers:
|
|
||||||
otlp:
|
|
||||||
protocols:
|
|
||||||
grpc:
|
|
||||||
endpoint: 0.0.0.0:4317
|
|
||||||
http:
|
|
||||||
endpoint: 0.0.0.0:4318
|
|
||||||
|
|
||||||
# Filelog receiver for container logs
|
|
||||||
filelog:
|
|
||||||
include:
|
|
||||||
- /var/log/pods/*/*/*.log
|
|
||||||
exclude:
|
|
||||||
- /var/log/pods/opentelemetry_otel-collector*/*/*.log
|
|
||||||
start_at: end
|
|
||||||
include_file_path: true
|
|
||||||
include_file_name: false
|
|
||||||
operators:
|
|
||||||
- type: router
|
|
||||||
id: get-format
|
|
||||||
routes:
|
|
||||||
- output: parser-docker
|
|
||||||
expr: 'body matches "^\\{"'
|
|
||||||
- output: parser-containerd
|
|
||||||
expr: 'body matches "^[^ Z]+Z"'
|
|
||||||
default: parser-containerd
|
|
||||||
|
|
||||||
- type: json_parser
|
|
||||||
id: parser-docker
|
|
||||||
output: extract-metadata-from-filepath
|
|
||||||
timestamp:
|
|
||||||
parse_from: attributes.time
|
|
||||||
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
|
|
||||||
|
|
||||||
- type: regex_parser
|
|
||||||
id: parser-containerd
|
|
||||||
regex: '^(?P<time>[^ ^Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$'
|
|
||||||
output: extract-metadata-from-filepath
|
|
||||||
timestamp:
|
|
||||||
parse_from: attributes.time
|
|
||||||
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
|
|
||||||
|
|
||||||
- type: regex_parser
|
|
||||||
id: extract-metadata-from-filepath
|
|
||||||
regex: '^.*\/(?P<namespace>[^_]+)_(?P<pod_name>[^_]+)_(?P<uid>[a-f0-9-]+)\/(?P<container_name>[^\/]+)\/.*$'
|
|
||||||
parse_from: attributes["log.file.path"]
|
|
||||||
|
|
||||||
- type: move
|
|
||||||
from: attributes.namespace
|
|
||||||
to: resource["k8s.namespace.name"]
|
|
||||||
- type: move
|
|
||||||
from: attributes.pod_name
|
|
||||||
to: resource["k8s.pod.name"]
|
|
||||||
- type: move
|
|
||||||
from: attributes.container_name
|
|
||||||
to: resource["k8s.container.name"]
|
|
||||||
- type: move
|
|
||||||
from: attributes.uid
|
|
||||||
to: resource["k8s.pod.uid"]
|
|
||||||
- type: move
|
|
||||||
from: attributes.stream
|
|
||||||
to: attributes["log.iostream"]
|
|
||||||
- type: move
|
|
||||||
from: attributes.log
|
|
||||||
to: body
|
|
||||||
# Loki label hints - tell Loki exporter which attributes to use as labels
|
|
||||||
- type: add
|
|
||||||
field: resource["loki.resource.labels"]
|
|
||||||
value: "k8s.namespace.name, k8s.pod.name, k8s.container.name, k8s.node.name"
|
|
||||||
- type: add
|
|
||||||
field: attributes["loki.attribute.labels"]
|
|
||||||
value: "log.iostream"
|
|
||||||
|
|
||||||
# Prometheus receiver - self metrics only
|
|
||||||
prometheus:
|
|
||||||
config:
|
|
||||||
scrape_configs:
|
|
||||||
- job_name: otel-collector
|
|
||||||
scrape_interval: 60s
|
|
||||||
static_configs:
|
|
||||||
- targets: ['${env:K8S_POD_IP}:8888']
|
|
||||||
|
|
||||||
processors:
|
|
||||||
batch:
|
|
||||||
timeout: 10s
|
|
||||||
send_batch_size: 1024
|
|
||||||
send_batch_max_size: 2048
|
|
||||||
|
|
||||||
memory_limiter:
|
|
||||||
check_interval: 5s
|
|
||||||
limit_mib: 400
|
|
||||||
spike_limit_mib: 100
|
|
||||||
|
|
||||||
k8sattributes:
|
|
||||||
extract:
|
|
||||||
metadata:
|
|
||||||
- k8s.namespace.name
|
|
||||||
- k8s.deployment.name
|
|
||||||
- k8s.pod.name
|
|
||||||
- k8s.node.name
|
|
||||||
passthrough: false
|
|
||||||
pod_association:
|
|
||||||
- sources:
|
|
||||||
- from: resource_attribute
|
|
||||||
name: k8s.pod.ip
|
|
||||||
- sources:
|
|
||||||
- from: resource_attribute
|
|
||||||
name: k8s.pod.uid
|
|
||||||
- sources:
|
|
||||||
- from: connection
|
|
||||||
|
|
||||||
resourcedetection:
|
|
||||||
detectors: [env, system]
|
|
||||||
timeout: 5s
|
|
||||||
override: false
|
|
||||||
|
|
||||||
exporters:
|
|
||||||
otlp/tempo:
|
|
||||||
endpoint: tempo.tempo.svc.cluster.local:4317
|
|
||||||
tls:
|
|
||||||
insecure: true
|
|
||||||
|
|
||||||
prometheusremotewrite:
|
|
||||||
endpoint: http://prometheus-kube-prometheus-prometheus.prometheus.svc:9090/api/v1/write
|
|
||||||
tls:
|
|
||||||
insecure: true
|
|
||||||
external_labels:
|
|
||||||
otel_collector: ${env:K8S_POD_NAME}
|
|
||||||
|
|
||||||
loki:
|
|
||||||
endpoint: http://loki.loki.svc.cluster.local:3100/loki/api/v1/push
|
|
||||||
default_labels_enabled:
|
|
||||||
exporter: false
|
|
||||||
level: true
|
|
||||||
|
|
||||||
debug:
|
|
||||||
verbosity: basic
|
|
||||||
|
|
||||||
extensions:
|
|
||||||
health_check:
|
|
||||||
endpoint: 0.0.0.0:13133
|
|
||||||
|
|
||||||
service:
|
|
||||||
extensions: [health_check]
|
|
||||||
pipelines:
|
|
||||||
traces:
|
|
||||||
receivers: [otlp]
|
|
||||||
processors: [memory_limiter, k8sattributes, resourcedetection, batch]
|
|
||||||
exporters: [otlp/tempo]
|
|
||||||
|
|
||||||
metrics:
|
|
||||||
receivers: [otlp, prometheus]
|
|
||||||
processors: [memory_limiter, k8sattributes, resourcedetection, batch]
|
|
||||||
exporters: [prometheusremotewrite]
|
|
||||||
|
|
||||||
logs:
|
|
||||||
receivers: [otlp, filelog]
|
|
||||||
processors: [memory_limiter, k8sattributes, resourcedetection, batch]
|
|
||||||
exporters: [loki]
|
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
|
||||||
kind: Kustomization
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- rbac.yaml
|
|
||||||
- collector.yaml
|
|
||||||
@@ -1,85 +0,0 @@
|
|||||||
# RBAC for OpenTelemetry Collector and Target Allocator
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
name: otel-collector
|
|
||||||
namespace: opentelemetry
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRole
|
|
||||||
metadata:
|
|
||||||
name: otel-collector
|
|
||||||
rules:
|
|
||||||
# For k8sattributes processor
|
|
||||||
- apiGroups: [""]
|
|
||||||
resources: ["pods", "namespaces", "nodes", "endpoints", "services"]
|
|
||||||
verbs: ["get", "watch", "list"]
|
|
||||||
- apiGroups: ["apps"]
|
|
||||||
resources: ["replicasets", "deployments", "statefulsets", "daemonsets"]
|
|
||||||
verbs: ["get", "watch", "list"]
|
|
||||||
- apiGroups: ["discovery.k8s.io"]
|
|
||||||
resources: ["endpointslices"]
|
|
||||||
verbs: ["get", "watch", "list"]
|
|
||||||
# For Target Allocator - ServiceMonitor/PodMonitor discovery
|
|
||||||
- apiGroups: ["monitoring.coreos.com"]
|
|
||||||
resources: ["servicemonitors", "podmonitors"]
|
|
||||||
verbs: ["get", "watch", "list"]
|
|
||||||
# For node metrics
|
|
||||||
- apiGroups: [""]
|
|
||||||
resources: ["nodes/metrics", "nodes/stats", "nodes/proxy"]
|
|
||||||
verbs: ["get"]
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRoleBinding
|
|
||||||
metadata:
|
|
||||||
name: otel-collector
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: ClusterRole
|
|
||||||
name: otel-collector
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: otel-collector
|
|
||||||
namespace: opentelemetry
|
|
||||||
---
|
|
||||||
# Target Allocator ServiceAccount and RBAC
|
|
||||||
apiVersion: v1
|
|
||||||
kind: ServiceAccount
|
|
||||||
metadata:
|
|
||||||
name: otel-collector-targetallocator
|
|
||||||
namespace: opentelemetry
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRole
|
|
||||||
metadata:
|
|
||||||
name: otel-targetallocator
|
|
||||||
rules:
|
|
||||||
# Core resources for service discovery
|
|
||||||
- apiGroups: [""]
|
|
||||||
resources: ["pods", "nodes", "services", "endpoints", "namespaces"]
|
|
||||||
verbs: ["get", "watch", "list"]
|
|
||||||
- apiGroups: ["discovery.k8s.io"]
|
|
||||||
resources: ["endpointslices"]
|
|
||||||
verbs: ["get", "watch", "list"]
|
|
||||||
# Prometheus CRs
|
|
||||||
- apiGroups: ["monitoring.coreos.com"]
|
|
||||||
resources: ["servicemonitors", "podmonitors", "probes", "scrapeconfigs"]
|
|
||||||
verbs: ["get", "watch", "list"]
|
|
||||||
# For allocator coordination
|
|
||||||
- apiGroups: ["opentelemetry.io"]
|
|
||||||
resources: ["opentelemetrycollectors"]
|
|
||||||
verbs: ["get", "watch", "list"]
|
|
||||||
---
|
|
||||||
apiVersion: rbac.authorization.k8s.io/v1
|
|
||||||
kind: ClusterRoleBinding
|
|
||||||
metadata:
|
|
||||||
name: otel-targetallocator
|
|
||||||
roleRef:
|
|
||||||
apiGroup: rbac.authorization.k8s.io
|
|
||||||
kind: ClusterRole
|
|
||||||
name: otel-targetallocator
|
|
||||||
subjects:
|
|
||||||
- kind: ServiceAccount
|
|
||||||
name: otel-collector-targetallocator
|
|
||||||
namespace: opentelemetry
|
|
||||||
@@ -1,55 +0,0 @@
|
|||||||
# OpenTelemetry Operator Helm Values
|
|
||||||
# Chart: https://github.com/open-telemetry/opentelemetry-helm-charts/tree/main/charts/opentelemetry-operator
|
|
||||||
|
|
||||||
# Manager (Operator) configuration
|
|
||||||
manager:
|
|
||||||
collectorImage:
|
|
||||||
repository: otel/opentelemetry-collector-contrib
|
|
||||||
targetAllocatorImage:
|
|
||||||
repository: ghcr.io/open-telemetry/opentelemetry-operator/target-allocator
|
|
||||||
autoInstrumentationImage:
|
|
||||||
java:
|
|
||||||
repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-java
|
|
||||||
nodejs:
|
|
||||||
repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-nodejs
|
|
||||||
python:
|
|
||||||
repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-python
|
|
||||||
dotnet:
|
|
||||||
repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-dotnet
|
|
||||||
go:
|
|
||||||
repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-go
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: null # Disable chart default CPU limits
|
|
||||||
memory: 256Mi
|
|
||||||
requests:
|
|
||||||
cpu: 10m
|
|
||||||
memory: 256Mi
|
|
||||||
# ServiceMonitor configuration
|
|
||||||
serviceMonitor:
|
|
||||||
enabled: false # Disable ServiceMonitor creation to prevent conflicts
|
|
||||||
|
|
||||||
# Admission webhooks (uses cert-manager self-signed CA)
|
|
||||||
admissionWebhooks:
|
|
||||||
certManager:
|
|
||||||
enabled: true
|
|
||||||
|
|
||||||
# Kube RBAC Proxy
|
|
||||||
kubeRBACProxy:
|
|
||||||
enabled: true
|
|
||||||
resources:
|
|
||||||
limits:
|
|
||||||
cpu: null # Disable chart default CPU limits
|
|
||||||
memory: 64Mi
|
|
||||||
requests:
|
|
||||||
cpu: 5m
|
|
||||||
memory: 64Mi
|
|
||||||
|
|
||||||
# Schedule on master node
|
|
||||||
tolerations:
|
|
||||||
- key: node-role.kubernetes.io/control-plane
|
|
||||||
operator: Exists
|
|
||||||
effect: NoSchedule
|
|
||||||
|
|
||||||
nodeSelector:
|
|
||||||
node-role.kubernetes.io/control-plane: "true"
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
|
||||||
kind: Kustomization
|
|
||||||
|
|
||||||
resources:
|
|
||||||
- argocd.yaml
|
|
||||||
@@ -1,27 +1,27 @@
|
|||||||
apiVersion: argoproj.io/v1alpha1
|
apiVersion: argoproj.io/v1alpha1
|
||||||
kind: Application
|
kind: Application
|
||||||
metadata:
|
metadata:
|
||||||
name: opentelemetry-operator
|
name: opentelemetry
|
||||||
namespace: argocd
|
namespace: argocd
|
||||||
finalizers:
|
finalizers:
|
||||||
- resources-finalizer.argocd.argoproj.io
|
- resources-finalizer.argocd.argoproj.io
|
||||||
annotations:
|
annotations:
|
||||||
argocd.argoproj.io/sync-wave: "0"
|
argocd.argoproj.io/sync-wave: "1"
|
||||||
spec:
|
spec:
|
||||||
project: default
|
project: default
|
||||||
sources:
|
sources:
|
||||||
- repoURL: https://open-telemetry.github.io/opentelemetry-helm-charts
|
- repoURL: https://open-telemetry.github.io/opentelemetry-helm-charts
|
||||||
chart: opentelemetry-operator
|
chart: opentelemetry-collector
|
||||||
targetRevision: 0.74.0
|
targetRevision: 0.108.0
|
||||||
helm:
|
helm:
|
||||||
valueFiles:
|
valueFiles:
|
||||||
- $values/opentelemetry-operator/helm-values.yaml
|
- $values/opentelemetry/helm-values.yaml
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
ref: values
|
ref: values
|
||||||
destination:
|
destination:
|
||||||
server: https://kubernetes.default.svc
|
server: https://kubernetes.default.svc
|
||||||
namespace: opentelemetry-operator
|
namespace: opentelemetry
|
||||||
syncPolicy:
|
syncPolicy:
|
||||||
automated:
|
automated:
|
||||||
prune: true
|
prune: true
|
||||||
@@ -11,6 +11,11 @@
|
|||||||
# Pipeline:
|
# Pipeline:
|
||||||
# Applications → OTel Collector → Tempo/Prometheus/Loki → Grafana
|
# Applications → OTel Collector → Tempo/Prometheus/Loki → Grafana
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Name Override
|
||||||
|
# =============================================================================
|
||||||
|
fullnameOverride: otel-collector
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Image Configuration
|
# Image Configuration
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
@@ -23,22 +28,31 @@ image:
|
|||||||
mode: daemonset
|
mode: daemonset
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Resource Limits (increased for log + metrics collection)
|
# Resource Limits (no CPU limit for stability, mem limit capped at 1024Mi)
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 50m
|
cpu: 34m
|
||||||
memory: 512Mi
|
memory: 142Mi
|
||||||
limits:
|
limits:
|
||||||
memory: 512Mi
|
memory: 1024Mi
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Tolerations (run on all nodes including master)
|
# Environment Variables
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
tolerations:
|
extraEnvs:
|
||||||
- key: node-role.kubernetes.io/control-plane
|
- name: K8S_NODE_NAME
|
||||||
operator: Exists
|
valueFrom:
|
||||||
effect: NoSchedule
|
fieldRef:
|
||||||
|
fieldPath: spec.nodeName
|
||||||
|
- name: K8S_POD_NAME
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: metadata.name
|
||||||
|
- name: K8S_POD_IP
|
||||||
|
valueFrom:
|
||||||
|
fieldRef:
|
||||||
|
fieldPath: status.podIP
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Extra Volumes for Log Collection
|
# Extra Volumes for Log Collection
|
||||||
@@ -160,6 +174,13 @@ config:
|
|||||||
- type: move
|
- type: move
|
||||||
from: attributes.log
|
from: attributes.log
|
||||||
to: body
|
to: body
|
||||||
|
# Loki label hints - tell Loki exporter which attributes to use as labels
|
||||||
|
- type: add
|
||||||
|
field: resource["loki.resource.labels"]
|
||||||
|
value: "k8s.namespace.name, k8s.pod.name, k8s.container.name, k8s.node.name"
|
||||||
|
- type: add
|
||||||
|
field: attributes["loki.attribute.labels"]
|
||||||
|
value: "log.iostream"
|
||||||
|
|
||||||
# Prometheus receiver - self metrics only
|
# Prometheus receiver - self metrics only
|
||||||
# Infrastructure metrics (node-exporter, kube-state-metrics) handled by Prometheus
|
# Infrastructure metrics (node-exporter, kube-state-metrics) handled by Prometheus
|
||||||
@@ -168,9 +189,9 @@ config:
|
|||||||
scrape_configs:
|
scrape_configs:
|
||||||
# OTel Collector self metrics only
|
# OTel Collector self metrics only
|
||||||
- job_name: 'otel-collector'
|
- job_name: 'otel-collector'
|
||||||
scrape_interval: 30s
|
scrape_interval: 60s
|
||||||
static_configs:
|
static_configs:
|
||||||
- targets: ['${env:MY_POD_IP}:8888']
|
- targets: ['${env:K8S_POD_IP}:8888']
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Processors - how data is transformed
|
# Processors - how data is transformed
|
||||||
@@ -228,12 +249,14 @@ config:
|
|||||||
endpoint: http://prometheus-kube-prometheus-prometheus.prometheus.svc:9090/api/v1/write
|
endpoint: http://prometheus-kube-prometheus-prometheus.prometheus.svc:9090/api/v1/write
|
||||||
tls:
|
tls:
|
||||||
insecure: true
|
insecure: true
|
||||||
|
external_labels:
|
||||||
|
otel_collector: ${env:K8S_POD_NAME}
|
||||||
|
|
||||||
# Loki for logs
|
# Loki for logs
|
||||||
loki:
|
loki:
|
||||||
endpoint: http://loki.loki.svc.cluster.local:3100/loki/api/v1/push
|
endpoint: http://loki.loki.svc.cluster.local:3100/loki/api/v1/push
|
||||||
default_labels_enabled:
|
default_labels_enabled:
|
||||||
exporter: true
|
exporter: false
|
||||||
level: true
|
level: true
|
||||||
|
|
||||||
# Debug exporter (for troubleshooting)
|
# Debug exporter (for troubleshooting)
|
||||||
@@ -14,10 +14,10 @@ spec:
|
|||||||
helm:
|
helm:
|
||||||
valueFiles:
|
valueFiles:
|
||||||
- $values/prometheus/helm-values.yaml
|
- $values/prometheus/helm-values.yaml
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
ref: values
|
ref: values
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
path: prometheus
|
path: prometheus
|
||||||
destination:
|
destination:
|
||||||
|
|||||||
@@ -14,6 +14,13 @@ prometheusOperator:
|
|||||||
enabled: true
|
enabled: true
|
||||||
# CRD 생성 비활성화
|
# CRD 생성 비활성화
|
||||||
createCustomResource: false
|
createCustomResource: false
|
||||||
|
# Resource settings (no CPU limit for stability)
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 15m
|
||||||
|
memory: 100Mi
|
||||||
|
limits:
|
||||||
|
memory: 100Mi
|
||||||
|
|
||||||
# Kubelet ServiceMonitor with cluster label
|
# Kubelet ServiceMonitor with cluster label
|
||||||
kubelet:
|
kubelet:
|
||||||
@@ -105,9 +112,9 @@ prometheus:
|
|||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 50m
|
cpu: 50m
|
||||||
memory: 768Mi
|
memory: 1Gi
|
||||||
limits:
|
limits:
|
||||||
memory: 768Mi
|
memory: 1Gi
|
||||||
|
|
||||||
# ServiceMonitor selector - scrape all ServiceMonitors
|
# ServiceMonitor selector - scrape all ServiceMonitors
|
||||||
serviceMonitorSelectorNilUsesHelmValues: false
|
serviceMonitorSelectorNilUsesHelmValues: false
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ spec:
|
|||||||
data:
|
data:
|
||||||
- secretKey: password
|
- secretKey: password
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: postgresql
|
key: storage/postgresql
|
||||||
property: PASSWORD
|
property: PASSWORD
|
||||||
---
|
---
|
||||||
apiVersion: external-secrets.io/v1
|
apiVersion: external-secrets.io/v1
|
||||||
@@ -43,14 +43,14 @@ spec:
|
|||||||
data:
|
data:
|
||||||
- secretKey: access_key
|
- secretKey: access_key
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: minio
|
key: storage/minio
|
||||||
property: ROOT_USER
|
property: ROOT_USER
|
||||||
conversionStrategy: Default
|
conversionStrategy: Default
|
||||||
decodingStrategy: None
|
decodingStrategy: None
|
||||||
metadataPolicy: None
|
metadataPolicy: None
|
||||||
- secretKey: secret_key
|
- secretKey: secret_key
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: minio
|
key: storage/minio
|
||||||
property: ROOT_PASSWORD
|
property: ROOT_PASSWORD
|
||||||
conversionStrategy: Default
|
conversionStrategy: Default
|
||||||
decodingStrategy: None
|
decodingStrategy: None
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ spec:
|
|||||||
helm:
|
helm:
|
||||||
valueFiles:
|
valueFiles:
|
||||||
- $values/promtail/helm-values.yaml
|
- $values/promtail/helm-values.yaml
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
ref: values
|
ref: values
|
||||||
destination:
|
destination:
|
||||||
|
|||||||
@@ -37,12 +37,6 @@ resources:
|
|||||||
limits:
|
limits:
|
||||||
memory: 182Mi
|
memory: 182Mi
|
||||||
|
|
||||||
# Tolerations to run on all nodes including control-plane
|
|
||||||
tolerations:
|
|
||||||
- key: node-role.kubernetes.io/control-plane
|
|
||||||
operator: Exists
|
|
||||||
effect: NoSchedule
|
|
||||||
|
|
||||||
# ServiceMonitor disabled
|
# ServiceMonitor disabled
|
||||||
serviceMonitor:
|
serviceMonitor:
|
||||||
enabled: false
|
enabled: false
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ spec:
|
|||||||
helm:
|
helm:
|
||||||
valueFiles:
|
valueFiles:
|
||||||
- $values/tempo/helm-values.yaml
|
- $values/tempo/helm-values.yaml
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
ref: values
|
ref: values
|
||||||
destination:
|
destination:
|
||||||
|
|||||||
@@ -13,20 +13,17 @@ priorityClassName: medium-priority
|
|||||||
# Single replica
|
# Single replica
|
||||||
replicas: 1
|
replicas: 1
|
||||||
|
|
||||||
# =============================================================================
|
|
||||||
# Resource Limits (optimized for small cluster)
|
|
||||||
# =============================================================================
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
cpu: 15m
|
|
||||||
memory: 100Mi
|
|
||||||
limits:
|
|
||||||
memory: 100Mi
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Tempo Configuration
|
# Tempo Configuration
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
tempo:
|
tempo:
|
||||||
|
# Resource settings (no CPU limit for stability)
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
cpu: 15m
|
||||||
|
memory: 100Mi
|
||||||
|
limits:
|
||||||
|
memory: 109Mi
|
||||||
# Receivers - protocols Tempo accepts
|
# Receivers - protocols Tempo accepts
|
||||||
receivers:
|
receivers:
|
||||||
otlp:
|
otlp:
|
||||||
|
|||||||
@@ -14,10 +14,10 @@ spec:
|
|||||||
helm:
|
helm:
|
||||||
valueFiles:
|
valueFiles:
|
||||||
- $values/thanos/helm-values.yaml
|
- $values/thanos/helm-values.yaml
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
ref: values
|
ref: values
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
path: thanos/manifests
|
path: thanos/manifests
|
||||||
destination:
|
destination:
|
||||||
|
|||||||
@@ -46,12 +46,13 @@ query:
|
|||||||
- --query.replica-label=prometheus_replica
|
- --query.replica-label=prometheus_replica
|
||||||
- --query.auto-downsampling
|
- --query.auto-downsampling
|
||||||
|
|
||||||
|
# Resource settings (no CPU limit for stability)
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 15m
|
cpu: 15m
|
||||||
memory: 283Mi
|
memory: 100Mi
|
||||||
limits:
|
limits:
|
||||||
memory: 283Mi
|
memory: 126Mi
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Query Frontend - Caching layer for Query (optional, disabled for small cluster)
|
# Query Frontend - Caching layer for Query (optional, disabled for small cluster)
|
||||||
|
|||||||
@@ -24,9 +24,9 @@ spec:
|
|||||||
data:
|
data:
|
||||||
- secretKey: access_key
|
- secretKey: access_key
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: minio
|
key: storage/minio
|
||||||
property: ROOT_USER
|
property: ROOT_USER
|
||||||
- secretKey: secret_key
|
- secretKey: secret_key
|
||||||
remoteRef:
|
remoteRef:
|
||||||
key: minio
|
key: storage/minio
|
||||||
property: ROOT_PASSWORD
|
property: ROOT_PASSWORD
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ spec:
|
|||||||
helm:
|
helm:
|
||||||
valueFiles:
|
valueFiles:
|
||||||
- $values/vpa/helm-values.yaml
|
- $values/vpa/helm-values.yaml
|
||||||
- repoURL: https://github.com/K3S-HOME/observability.git
|
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||||
targetRevision: main
|
targetRevision: main
|
||||||
ref: values
|
ref: values
|
||||||
destination:
|
destination:
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ recommender:
|
|||||||
enabled: true
|
enabled: true
|
||||||
replicaCount: 1
|
replicaCount: 1
|
||||||
|
|
||||||
|
# Resource settings (no CPU limit for stability)
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 15m
|
cpu: 15m
|
||||||
@@ -13,15 +14,6 @@ recommender:
|
|||||||
limits:
|
limits:
|
||||||
memory: 100Mi
|
memory: 100Mi
|
||||||
|
|
||||||
# Schedule on control-plane node
|
|
||||||
nodeSelector:
|
|
||||||
node-role.kubernetes.io/control-plane: "true"
|
|
||||||
|
|
||||||
tolerations:
|
|
||||||
- key: node-role.kubernetes.io/control-plane
|
|
||||||
operator: Exists
|
|
||||||
effect: NoSchedule
|
|
||||||
|
|
||||||
# Updater - applies recommended resource requests to pods
|
# Updater - applies recommended resource requests to pods
|
||||||
# Disabled because we're using updateMode: Off (recommendations only)
|
# Disabled because we're using updateMode: Off (recommendations only)
|
||||||
updater:
|
updater:
|
||||||
|
|||||||
Reference in New Issue
Block a user