FEAT(otel): enable Target Allocator for metrics

- Enable Target Allocator with consistent-hashing strategy
- Configure prometheus receiver to use Target Allocator
- Add RBAC permissions for secrets and events
- Use prometheusCR for ServiceMonitor/PodMonitor discovery
This commit is contained in:
2026-01-09 23:30:41 +09:00
parent 02faf93555
commit 1fdbb5e1dd
2 changed files with 36 additions and 10 deletions

View File

@@ -3,9 +3,9 @@
#
# Architecture:
# - DaemonSet mode: one collector per node for log collection
# - Target Allocator: distributes scrape targets across collectors
# - Target Allocator (consistent-hashing): distributes scrape targets across collectors
# - Filelog receiver for container logs
# - Prometheus receiver with Target Allocator for metrics
# - Prometheus receiver with Target Allocator for metrics (replaces Prometheus scraping)
# - Exports to: Tempo (traces), Prometheus (metrics), Loki (logs)
apiVersion: opentelemetry.io/v1beta1
kind: OpenTelemetryCollector
@@ -17,8 +17,25 @@ spec:
image: otel/opentelemetry-collector-contrib:0.113.0
serviceAccount: otel-collector
# Target Allocator disabled - metrics collected by Prometheus directly
# OTel handles logs (filelog) and traces (otlp) only
# Target Allocator - distributes Prometheus scrape targets across collectors
# Using consistent-hashing strategy (not per-node due to collector-node mapping bug)
targetAllocator:
enabled: true
serviceAccount: otel-collector-targetallocator
image: ghcr.io/open-telemetry/opentelemetry-operator/target-allocator:0.113.0
allocationStrategy: consistent-hashing
filterStrategy: relabel-config
prometheusCR:
enabled: true
serviceMonitorSelector: {}
podMonitorSelector: {}
scrapeInterval: 30s
resources:
requests:
cpu: 10m
memory: 64Mi
limits:
memory: 128Mi
resources:
requests:
@@ -143,14 +160,15 @@ spec:
from: attributes.log
to: body
# Prometheus receiver - self metrics only
# Prometheus receiver - uses Target Allocator for ServiceMonitor/PodMonitor discovery
prometheus:
config:
scrape_configs:
- job_name: otel-collector
global:
scrape_interval: 60s
static_configs:
- targets: ['${env:K8S_POD_IP}:8888']
target_allocator:
endpoint: http://otel-collector-targetallocator:80
interval: 30s
collector_id: ${env:K8S_POD_NAME}
processors:
batch:

View File

@@ -59,6 +59,14 @@ rules:
- apiGroups: [""]
resources: ["pods", "nodes", "services", "endpoints", "namespaces"]
verbs: ["get", "watch", "list"]
# Secrets for TLS certificates referenced by ServiceMonitors
- apiGroups: [""]
resources: ["secrets", "configmaps"]
verbs: ["get", "watch", "list"]
# Events for status reporting
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "patch"]
- apiGroups: ["discovery.k8s.io"]
resources: ["endpointslices"]
verbs: ["get", "watch", "list"]