diff --git a/opentelemetry-collector/manifests/collector.yaml b/opentelemetry-collector/manifests/collector-logs.yaml similarity index 70% rename from opentelemetry-collector/manifests/collector.yaml rename to opentelemetry-collector/manifests/collector-logs.yaml index 04f7588..13eb1b4 100644 --- a/opentelemetry-collector/manifests/collector.yaml +++ b/opentelemetry-collector/manifests/collector-logs.yaml @@ -1,42 +1,15 @@ -# OpenTelemetry Collector with Target Allocator -# Managed by OpenTelemetry Operator -# -# Architecture: -# - DaemonSet mode: one collector per node for log collection -# - Target Allocator (consistent-hashing): distributes scrape targets across collectors -# - Filelog receiver for container logs -# - Prometheus receiver with Target Allocator for metrics (replaces Prometheus scraping) -# - Exports to: Tempo (traces), Prometheus (metrics), Loki (logs) +# OpenTelemetry Collector for Logs and Traces +# DaemonSet mode - runs on every node for log collection apiVersion: opentelemetry.io/v1beta1 kind: OpenTelemetryCollector metadata: - name: otel-collector + name: otel-logs namespace: opentelemetry spec: mode: daemonset image: otel/opentelemetry-collector-contrib:0.113.0 serviceAccount: otel-collector - # Target Allocator - distributes Prometheus scrape targets across collectors - # per-node strategy: each collector scrapes targets on its own node - targetAllocator: - enabled: true - serviceAccount: otel-collector-targetallocator - image: ghcr.io/open-telemetry/opentelemetry-operator/target-allocator:0.113.0 - allocationStrategy: per-node - filterStrategy: relabel-config - prometheusCR: - enabled: true - serviceMonitorSelector: {} - podMonitorSelector: {} - scrapeInterval: 30s - resources: - requests: - cpu: 10m - memory: 64Mi - limits: - memory: 128Mi - resources: requests: cpu: 50m @@ -107,7 +80,7 @@ spec: include: - /var/log/pods/*/*/*.log exclude: - - /var/log/pods/opentelemetry_otel-collector*/*/*.log + - /var/log/pods/opentelemetry_otel-*/*/*.log start_at: end include_file_path: true include_file_name: false @@ -160,21 +133,6 @@ spec: from: attributes.log to: body - # Prometheus receiver - uses Target Allocator for ServiceMonitor/PodMonitor discovery - prometheus: - config: - global: - scrape_interval: 60s - scrape_configs: - - job_name: otel-collector - scrape_interval: 60s - static_configs: - - targets: ['${env:K8S_POD_IP}:8888'] - target_allocator: - endpoint: http://otel-collector-targetallocator:80 - interval: 30s - collector_id: ${env:K8S_POD_NAME} - processors: batch: timeout: 10s @@ -215,13 +173,6 @@ spec: tls: insecure: true - prometheusremotewrite: - endpoint: http://prometheus-kube-prometheus-prometheus.prometheus.svc:9090/api/v1/write - tls: - insecure: true - external_labels: - otel_collector: ${env:K8S_POD_NAME} - loki: endpoint: http://loki.loki.svc.cluster.local:3100/loki/api/v1/push default_labels_enabled: @@ -243,11 +194,6 @@ spec: processors: [memory_limiter, k8sattributes, resourcedetection, batch] exporters: [otlp/tempo] - metrics: - receivers: [otlp, prometheus] - processors: [memory_limiter, k8sattributes, resourcedetection, batch] - exporters: [prometheusremotewrite] - logs: receivers: [otlp, filelog] processors: [memory_limiter, k8sattributes, resourcedetection, batch] diff --git a/opentelemetry-collector/manifests/collector-metrics.yaml b/opentelemetry-collector/manifests/collector-metrics.yaml new file mode 100644 index 0000000..e8b2954 --- /dev/null +++ b/opentelemetry-collector/manifests/collector-metrics.yaml @@ -0,0 +1,147 @@ +# OpenTelemetry Collector for Metrics +# Deployment mode with Target Allocator (consistent-hashing) +apiVersion: opentelemetry.io/v1beta1 +kind: OpenTelemetryCollector +metadata: + name: otel-metrics + namespace: opentelemetry +spec: + mode: deployment + replicas: 2 + image: otel/opentelemetry-collector-contrib:0.113.0 + serviceAccount: otel-collector + + # Target Allocator - distributes scrape targets across collector replicas + targetAllocator: + enabled: true + serviceAccount: otel-collector-targetallocator + image: ghcr.io/open-telemetry/opentelemetry-operator/target-allocator:0.113.0 + allocationStrategy: consistent-hashing + filterStrategy: relabel-config + prometheusCR: + enabled: true + serviceMonitorSelector: {} + podMonitorSelector: {} + scrapeInterval: 30s + resources: + requests: + cpu: 10m + memory: 64Mi + limits: + memory: 128Mi + + resources: + requests: + cpu: 50m + memory: 256Mi + limits: + memory: 512Mi + + ports: + - name: otlp-grpc + port: 4317 + protocol: TCP + targetPort: 4317 + - name: otlp-http + port: 4318 + protocol: TCP + targetPort: 4318 + - name: metrics + port: 8888 + protocol: TCP + targetPort: 8888 + + env: + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: K8S_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + + config: + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + + # Prometheus receiver with Target Allocator + prometheus: + config: + global: + scrape_interval: 60s + scrape_configs: + - job_name: otel-metrics-self + scrape_interval: 60s + static_configs: + - targets: ['${env:K8S_POD_IP}:8888'] + target_allocator: + endpoint: http://otel-metrics-targetallocator:80 + interval: 30s + collector_id: ${env:K8S_POD_NAME} + + processors: + batch: + timeout: 10s + send_batch_size: 1024 + send_batch_max_size: 2048 + + memory_limiter: + check_interval: 5s + limit_mib: 400 + spike_limit_mib: 100 + + k8sattributes: + extract: + metadata: + - k8s.namespace.name + - k8s.deployment.name + - k8s.pod.name + - k8s.node.name + passthrough: false + pod_association: + - sources: + - from: resource_attribute + name: k8s.pod.ip + - sources: + - from: resource_attribute + name: k8s.pod.uid + - sources: + - from: connection + + resourcedetection: + detectors: [env, system] + timeout: 5s + override: false + + exporters: + prometheusremotewrite: + endpoint: http://prometheus-kube-prometheus-prometheus.prometheus.svc:9090/api/v1/write + tls: + insecure: true + external_labels: + otel_collector: ${env:K8S_POD_NAME} + + debug: + verbosity: basic + + extensions: + health_check: + endpoint: 0.0.0.0:13133 + + service: + extensions: [health_check] + pipelines: + metrics: + receivers: [otlp, prometheus] + processors: [memory_limiter, k8sattributes, resourcedetection, batch] + exporters: [prometheusremotewrite] diff --git a/opentelemetry-collector/manifests/kustomization.yaml b/opentelemetry-collector/manifests/kustomization.yaml index 73fb16e..e0cc2c4 100644 --- a/opentelemetry-collector/manifests/kustomization.yaml +++ b/opentelemetry-collector/manifests/kustomization.yaml @@ -3,4 +3,5 @@ kind: Kustomization resources: - rbac.yaml - - collector.yaml + - collector-logs.yaml + - collector-metrics.yaml