REFACTOR(otel): migrate collector from Operator to Helm
- Remove opentelemetry-operator (no longer needed) - Convert opentelemetry-collector to direct Helm Chart - Remove CRD-based manifests (collector.yaml, rbac.yaml) - Update helm-values.yaml with Loki labels and env vars - Simplify architecture: Helm -> DaemonSet (no Operator)
This commit is contained in:
@@ -12,7 +12,6 @@ resources:
|
||||
- loki/argocd.yaml
|
||||
# promtail removed - OTel filelog receiver handles log collection
|
||||
- tempo/argocd.yaml
|
||||
- opentelemetry-operator/argocd.yaml
|
||||
- opentelemetry-collector/argocd.yaml
|
||||
- node-exporter/argocd.yaml
|
||||
- kube-state-metrics/argocd.yaml
|
||||
|
||||
@@ -9,10 +9,16 @@ metadata:
|
||||
argocd.argoproj.io/sync-wave: "1"
|
||||
spec:
|
||||
project: default
|
||||
source:
|
||||
repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||
sources:
|
||||
- repoURL: https://open-telemetry.github.io/opentelemetry-helm-charts
|
||||
chart: opentelemetry-collector
|
||||
targetRevision: 0.108.0
|
||||
helm:
|
||||
valueFiles:
|
||||
- $values/opentelemetry-collector/helm-values.yaml
|
||||
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||
targetRevision: main
|
||||
path: opentelemetry-collector/manifests
|
||||
ref: values
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: opentelemetry
|
||||
|
||||
@@ -32,6 +32,23 @@ resources:
|
||||
limits:
|
||||
memory: 512Mi
|
||||
|
||||
# =============================================================================
|
||||
# Environment Variables
|
||||
# =============================================================================
|
||||
extraEnvs:
|
||||
- name: K8S_NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
- name: K8S_POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: K8S_POD_IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.podIP
|
||||
|
||||
# =============================================================================
|
||||
# Extra Volumes for Log Collection
|
||||
# =============================================================================
|
||||
@@ -152,6 +169,13 @@ config:
|
||||
- type: move
|
||||
from: attributes.log
|
||||
to: body
|
||||
# Loki label hints - tell Loki exporter which attributes to use as labels
|
||||
- type: add
|
||||
field: resource["loki.resource.labels"]
|
||||
value: "k8s.namespace.name, k8s.pod.name, k8s.container.name, k8s.node.name"
|
||||
- type: add
|
||||
field: attributes["loki.attribute.labels"]
|
||||
value: "log.iostream"
|
||||
|
||||
# Prometheus receiver - self metrics only
|
||||
# Infrastructure metrics (node-exporter, kube-state-metrics) handled by Prometheus
|
||||
@@ -160,9 +184,9 @@ config:
|
||||
scrape_configs:
|
||||
# OTel Collector self metrics only
|
||||
- job_name: 'otel-collector'
|
||||
scrape_interval: 30s
|
||||
scrape_interval: 60s
|
||||
static_configs:
|
||||
- targets: ['${env:MY_POD_IP}:8888']
|
||||
- targets: ['${env:K8S_POD_IP}:8888']
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Processors - how data is transformed
|
||||
@@ -220,12 +244,14 @@ config:
|
||||
endpoint: http://prometheus-kube-prometheus-prometheus.prometheus.svc:9090/api/v1/write
|
||||
tls:
|
||||
insecure: true
|
||||
external_labels:
|
||||
otel_collector: ${env:K8S_POD_NAME}
|
||||
|
||||
# Loki for logs
|
||||
loki:
|
||||
endpoint: http://loki.loki.svc.cluster.local:3100/loki/api/v1/push
|
||||
default_labels_enabled:
|
||||
exporter: true
|
||||
exporter: false
|
||||
level: true
|
||||
|
||||
# Debug exporter (for troubleshooting)
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
- argocd.yaml
|
||||
@@ -1,233 +0,0 @@
|
||||
# OpenTelemetry Collector with Target Allocator
|
||||
# Managed by OpenTelemetry Operator
|
||||
#
|
||||
# Architecture:
|
||||
# - DaemonSet mode: one collector per node for log collection
|
||||
# - Target Allocator: distributes scrape targets across collectors
|
||||
# - Filelog receiver for container logs
|
||||
# - Prometheus receiver with Target Allocator for metrics
|
||||
# - Exports to: Tempo (traces), Prometheus (metrics), Loki (logs)
|
||||
apiVersion: opentelemetry.io/v1beta1
|
||||
kind: OpenTelemetryCollector
|
||||
metadata:
|
||||
name: otel-collector
|
||||
namespace: opentelemetry
|
||||
spec:
|
||||
mode: daemonset
|
||||
image: otel/opentelemetry-collector-contrib:0.113.0
|
||||
serviceAccount: otel-collector
|
||||
|
||||
# Target Allocator disabled - metrics collected by Prometheus directly
|
||||
# OTel handles logs (filelog) and traces (otlp) only
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
memory: 512Mi
|
||||
|
||||
volumeMounts:
|
||||
- name: varlogpods
|
||||
mountPath: /var/log/pods
|
||||
readOnly: true
|
||||
- name: varlibdockercontainers
|
||||
mountPath: /var/lib/docker/containers
|
||||
readOnly: true
|
||||
|
||||
volumes:
|
||||
- name: varlogpods
|
||||
hostPath:
|
||||
path: /var/log/pods
|
||||
- name: varlibdockercontainers
|
||||
hostPath:
|
||||
path: /var/lib/docker/containers
|
||||
|
||||
ports:
|
||||
- name: otlp-grpc
|
||||
port: 4317
|
||||
protocol: TCP
|
||||
targetPort: 4317
|
||||
- name: otlp-http
|
||||
port: 4318
|
||||
protocol: TCP
|
||||
targetPort: 4318
|
||||
- name: metrics
|
||||
port: 8888
|
||||
protocol: TCP
|
||||
targetPort: 8888
|
||||
|
||||
env:
|
||||
- name: K8S_NODE_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
- name: K8S_POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: metadata.name
|
||||
- name: K8S_POD_IP
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: status.podIP
|
||||
|
||||
config:
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
# Filelog receiver for container logs
|
||||
filelog:
|
||||
include:
|
||||
- /var/log/pods/*/*/*.log
|
||||
exclude:
|
||||
- /var/log/pods/opentelemetry_otel-collector*/*/*.log
|
||||
start_at: end
|
||||
include_file_path: true
|
||||
include_file_name: false
|
||||
operators:
|
||||
- type: router
|
||||
id: get-format
|
||||
routes:
|
||||
- output: parser-docker
|
||||
expr: 'body matches "^\\{"'
|
||||
- output: parser-containerd
|
||||
expr: 'body matches "^[^ Z]+Z"'
|
||||
default: parser-containerd
|
||||
|
||||
- type: json_parser
|
||||
id: parser-docker
|
||||
output: extract-metadata-from-filepath
|
||||
timestamp:
|
||||
parse_from: attributes.time
|
||||
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
|
||||
|
||||
- type: regex_parser
|
||||
id: parser-containerd
|
||||
regex: '^(?P<time>[^ ^Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$'
|
||||
output: extract-metadata-from-filepath
|
||||
timestamp:
|
||||
parse_from: attributes.time
|
||||
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
|
||||
|
||||
- type: regex_parser
|
||||
id: extract-metadata-from-filepath
|
||||
regex: '^.*\/(?P<namespace>[^_]+)_(?P<pod_name>[^_]+)_(?P<uid>[a-f0-9-]+)\/(?P<container_name>[^\/]+)\/.*$'
|
||||
parse_from: attributes["log.file.path"]
|
||||
|
||||
- type: move
|
||||
from: attributes.namespace
|
||||
to: resource["k8s.namespace.name"]
|
||||
- type: move
|
||||
from: attributes.pod_name
|
||||
to: resource["k8s.pod.name"]
|
||||
- type: move
|
||||
from: attributes.container_name
|
||||
to: resource["k8s.container.name"]
|
||||
- type: move
|
||||
from: attributes.uid
|
||||
to: resource["k8s.pod.uid"]
|
||||
- type: move
|
||||
from: attributes.stream
|
||||
to: attributes["log.iostream"]
|
||||
- type: move
|
||||
from: attributes.log
|
||||
to: body
|
||||
# Loki label hints - tell Loki exporter which attributes to use as labels
|
||||
- type: add
|
||||
field: resource["loki.resource.labels"]
|
||||
value: "k8s.namespace.name, k8s.pod.name, k8s.container.name, k8s.node.name"
|
||||
- type: add
|
||||
field: attributes["loki.attribute.labels"]
|
||||
value: "log.iostream"
|
||||
|
||||
# Prometheus receiver - self metrics only
|
||||
prometheus:
|
||||
config:
|
||||
scrape_configs:
|
||||
- job_name: otel-collector
|
||||
scrape_interval: 60s
|
||||
static_configs:
|
||||
- targets: ['${env:K8S_POD_IP}:8888']
|
||||
|
||||
processors:
|
||||
batch:
|
||||
timeout: 10s
|
||||
send_batch_size: 1024
|
||||
send_batch_max_size: 2048
|
||||
|
||||
memory_limiter:
|
||||
check_interval: 5s
|
||||
limit_mib: 400
|
||||
spike_limit_mib: 100
|
||||
|
||||
k8sattributes:
|
||||
extract:
|
||||
metadata:
|
||||
- k8s.namespace.name
|
||||
- k8s.deployment.name
|
||||
- k8s.pod.name
|
||||
- k8s.node.name
|
||||
passthrough: false
|
||||
pod_association:
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.ip
|
||||
- sources:
|
||||
- from: resource_attribute
|
||||
name: k8s.pod.uid
|
||||
- sources:
|
||||
- from: connection
|
||||
|
||||
resourcedetection:
|
||||
detectors: [env, system]
|
||||
timeout: 5s
|
||||
override: false
|
||||
|
||||
exporters:
|
||||
otlp/tempo:
|
||||
endpoint: tempo.tempo.svc.cluster.local:4317
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
prometheusremotewrite:
|
||||
endpoint: http://prometheus-kube-prometheus-prometheus.prometheus.svc:9090/api/v1/write
|
||||
tls:
|
||||
insecure: true
|
||||
external_labels:
|
||||
otel_collector: ${env:K8S_POD_NAME}
|
||||
|
||||
loki:
|
||||
endpoint: http://loki.loki.svc.cluster.local:3100/loki/api/v1/push
|
||||
default_labels_enabled:
|
||||
exporter: false
|
||||
level: true
|
||||
|
||||
debug:
|
||||
verbosity: basic
|
||||
|
||||
extensions:
|
||||
health_check:
|
||||
endpoint: 0.0.0.0:13133
|
||||
|
||||
service:
|
||||
extensions: [health_check]
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [memory_limiter, k8sattributes, resourcedetection, batch]
|
||||
exporters: [otlp/tempo]
|
||||
|
||||
metrics:
|
||||
receivers: [otlp, prometheus]
|
||||
processors: [memory_limiter, k8sattributes, resourcedetection, batch]
|
||||
exporters: [prometheusremotewrite]
|
||||
|
||||
logs:
|
||||
receivers: [otlp, filelog]
|
||||
processors: [memory_limiter, k8sattributes, resourcedetection, batch]
|
||||
exporters: [loki]
|
||||
@@ -1,6 +0,0 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
- rbac.yaml
|
||||
- collector.yaml
|
||||
@@ -1,85 +0,0 @@
|
||||
# RBAC for OpenTelemetry Collector and Target Allocator
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: otel-collector
|
||||
namespace: opentelemetry
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: otel-collector
|
||||
rules:
|
||||
# For k8sattributes processor
|
||||
- apiGroups: [""]
|
||||
resources: ["pods", "namespaces", "nodes", "endpoints", "services"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["replicasets", "deployments", "statefulsets", "daemonsets"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
- apiGroups: ["discovery.k8s.io"]
|
||||
resources: ["endpointslices"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
# For Target Allocator - ServiceMonitor/PodMonitor discovery
|
||||
- apiGroups: ["monitoring.coreos.com"]
|
||||
resources: ["servicemonitors", "podmonitors"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
# For node metrics
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes/metrics", "nodes/stats", "nodes/proxy"]
|
||||
verbs: ["get"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: otel-collector
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: otel-collector
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: otel-collector
|
||||
namespace: opentelemetry
|
||||
---
|
||||
# Target Allocator ServiceAccount and RBAC
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: otel-collector-targetallocator
|
||||
namespace: opentelemetry
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: otel-targetallocator
|
||||
rules:
|
||||
# Core resources for service discovery
|
||||
- apiGroups: [""]
|
||||
resources: ["pods", "nodes", "services", "endpoints", "namespaces"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
- apiGroups: ["discovery.k8s.io"]
|
||||
resources: ["endpointslices"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
# Prometheus CRs
|
||||
- apiGroups: ["monitoring.coreos.com"]
|
||||
resources: ["servicemonitors", "podmonitors", "probes", "scrapeconfigs"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
# For allocator coordination
|
||||
- apiGroups: ["opentelemetry.io"]
|
||||
resources: ["opentelemetrycollectors"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: otel-targetallocator
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: otel-targetallocator
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: otel-collector-targetallocator
|
||||
namespace: opentelemetry
|
||||
@@ -1,44 +0,0 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: opentelemetry-operator
|
||||
namespace: argocd
|
||||
finalizers:
|
||||
- resources-finalizer.argocd.argoproj.io
|
||||
annotations:
|
||||
argocd.argoproj.io/sync-wave: "0"
|
||||
spec:
|
||||
project: default
|
||||
sources:
|
||||
- repoURL: https://open-telemetry.github.io/opentelemetry-helm-charts
|
||||
chart: opentelemetry-operator
|
||||
targetRevision: 0.102.0
|
||||
helm:
|
||||
valueFiles:
|
||||
- $values/opentelemetry-operator/helm-values.yaml
|
||||
- repoURL: https://github0213.com/K3S-HOME/observability.git
|
||||
targetRevision: main
|
||||
ref: values
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: opentelemetry-operator
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
allowEmpty: false
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
- PrunePropagationPolicy=foreground
|
||||
- PruneLast=true
|
||||
- ServerSideApply=true
|
||||
retry:
|
||||
limit: 5
|
||||
backoff:
|
||||
duration: 5s
|
||||
factor: 2
|
||||
maxDuration: 3m
|
||||
managedNamespaceMetadata:
|
||||
labels:
|
||||
goldilocks.fairwinds.com/enabled: 'true'
|
||||
revisionHistoryLimit: 10
|
||||
@@ -1,45 +0,0 @@
|
||||
# OpenTelemetry Operator Helm Values
|
||||
# Chart: https://github.com/open-telemetry/opentelemetry-helm-charts/tree/main/charts/opentelemetry-operator
|
||||
|
||||
# Manager (Operator) configuration
|
||||
manager:
|
||||
collectorImage:
|
||||
repository: otel/opentelemetry-collector-contrib
|
||||
targetAllocatorImage:
|
||||
repository: ghcr.io/open-telemetry/opentelemetry-operator/target-allocator
|
||||
autoInstrumentationImage:
|
||||
java:
|
||||
repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-java
|
||||
nodejs:
|
||||
repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-nodejs
|
||||
python:
|
||||
repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-python
|
||||
dotnet:
|
||||
repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-dotnet
|
||||
go:
|
||||
repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-go
|
||||
resources:
|
||||
limits:
|
||||
memory: 256Mi
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 256Mi
|
||||
# ServiceMonitor configuration
|
||||
serviceMonitor:
|
||||
enabled: false # Disable ServiceMonitor creation to prevent conflicts
|
||||
|
||||
# Admission webhooks (uses cert-manager self-signed CA)
|
||||
admissionWebhooks:
|
||||
certManager:
|
||||
enabled: true
|
||||
|
||||
# Kube RBAC Proxy
|
||||
kubeRBACProxy:
|
||||
enabled: true
|
||||
resources:
|
||||
limits:
|
||||
memory: 64Mi
|
||||
requests:
|
||||
cpu: 5m
|
||||
memory: 64Mi
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
- argocd.yaml
|
||||
Reference in New Issue
Block a user