Files
observability/opentelemetry-collector/manifests/collector.yaml
Mayne0213 9e87e6fbcb REVERT(otel): remove metrics collection, keep logs/traces only
- Revert to simpler architecture where Prometheus scrapes metrics directly via ServiceMonitors
- OTel Collector only handles logs (filelog) and traces (otlp)
- Remove Target Allocator and metrics-related config
- This reduces complexity and resource usage for home cluster
2026-01-10 01:18:35 +09:00

232 lines
6.1 KiB
YAML

# OpenTelemetry Collector with Target Allocator
# Managed by OpenTelemetry Operator
#
# Architecture:
# - DaemonSet mode: one collector per node for log collection
# - Target Allocator: distributes scrape targets across collectors
# - Filelog receiver for container logs
# - Prometheus receiver with Target Allocator for metrics
# - Exports to: Tempo (traces), Prometheus (metrics), Loki (logs)
apiVersion: opentelemetry.io/v1beta1
kind: OpenTelemetryCollector
metadata:
name: otel-collector
namespace: opentelemetry
spec:
mode: daemonset
image: otel/opentelemetry-collector-contrib:0.113.0
serviceAccount: otel-collector
# Target Allocator disabled - metrics collected by Prometheus directly
# OTel handles logs (filelog) and traces (otlp) only
resources:
requests:
cpu: 50m
memory: 256Mi
limits:
memory: 512Mi
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
volumeMounts:
- name: varlogpods
mountPath: /var/log/pods
readOnly: true
- name: varlibdockercontainers
mountPath: /var/lib/docker/containers
readOnly: true
volumes:
- name: varlogpods
hostPath:
path: /var/log/pods
- name: varlibdockercontainers
hostPath:
path: /var/lib/docker/containers
ports:
- name: otlp-grpc
port: 4317
protocol: TCP
targetPort: 4317
- name: otlp-http
port: 4318
protocol: TCP
targetPort: 4318
- name: metrics
port: 8888
protocol: TCP
targetPort: 8888
env:
- name: K8S_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: K8S_POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: K8S_POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
config:
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
# Filelog receiver for container logs
filelog:
include:
- /var/log/pods/*/*/*.log
exclude:
- /var/log/pods/opentelemetry_otel-collector*/*/*.log
start_at: end
include_file_path: true
include_file_name: false
operators:
- type: router
id: get-format
routes:
- output: parser-docker
expr: 'body matches "^\\{"'
- output: parser-containerd
expr: 'body matches "^[^ Z]+Z"'
default: parser-containerd
- type: json_parser
id: parser-docker
output: extract-metadata-from-filepath
timestamp:
parse_from: attributes.time
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
- type: regex_parser
id: parser-containerd
regex: '^(?P<time>[^ ^Z]+Z) (?P<stream>stdout|stderr) (?P<logtag>[^ ]*) ?(?P<log>.*)$'
output: extract-metadata-from-filepath
timestamp:
parse_from: attributes.time
layout: '%Y-%m-%dT%H:%M:%S.%LZ'
- type: regex_parser
id: extract-metadata-from-filepath
regex: '^.*\/(?P<namespace>[^_]+)_(?P<pod_name>[^_]+)_(?P<uid>[a-f0-9-]+)\/(?P<container_name>[^\/]+)\/.*$'
parse_from: attributes["log.file.path"]
- type: move
from: attributes.namespace
to: resource["k8s.namespace.name"]
- type: move
from: attributes.pod_name
to: resource["k8s.pod.name"]
- type: move
from: attributes.container_name
to: resource["k8s.container.name"]
- type: move
from: attributes.uid
to: resource["k8s.pod.uid"]
- type: move
from: attributes.stream
to: attributes["log.iostream"]
- type: move
from: attributes.log
to: body
# Prometheus receiver - self metrics only
prometheus:
config:
scrape_configs:
- job_name: otel-collector
scrape_interval: 60s
static_configs:
- targets: ['${env:K8S_POD_IP}:8888']
processors:
batch:
timeout: 10s
send_batch_size: 1024
send_batch_max_size: 2048
memory_limiter:
check_interval: 5s
limit_mib: 400
spike_limit_mib: 100
k8sattributes:
extract:
metadata:
- k8s.namespace.name
- k8s.deployment.name
- k8s.pod.name
- k8s.node.name
passthrough: false
pod_association:
- sources:
- from: resource_attribute
name: k8s.pod.ip
- sources:
- from: resource_attribute
name: k8s.pod.uid
- sources:
- from: connection
resourcedetection:
detectors: [env, system]
timeout: 5s
override: false
exporters:
otlp/tempo:
endpoint: tempo.tempo.svc.cluster.local:4317
tls:
insecure: true
prometheusremotewrite:
endpoint: http://prometheus-kube-prometheus-prometheus.prometheus.svc:9090/api/v1/write
tls:
insecure: true
external_labels:
otel_collector: ${env:K8S_POD_NAME}
loki:
endpoint: http://loki.loki.svc.cluster.local:3100/loki/api/v1/push
default_labels_enabled:
exporter: true
level: true
debug:
verbosity: basic
extensions:
health_check:
endpoint: 0.0.0.0:13133
service:
extensions: [health_check]
pipelines:
traces:
receivers: [otlp]
processors: [memory_limiter, k8sattributes, resourcedetection, batch]
exporters: [otlp/tempo]
metrics:
receivers: [otlp, prometheus]
processors: [memory_limiter, k8sattributes, resourcedetection, batch]
exporters: [prometheusremotewrite]
logs:
receivers: [otlp, filelog]
processors: [memory_limiter, k8sattributes, resourcedetection, batch]
exporters: [loki]