From 43cf7e9de738a3a61b91a997722032c244de96e9 Mon Sep 17 00:00:00 2001 From: Mayne0213 Date: Sun, 11 Jan 2026 21:22:39 +0900 Subject: [PATCH] REFACTOR(otel): migrate collector from Operator to Helm - Remove opentelemetry-operator (no longer needed) - Convert opentelemetry-collector to direct Helm Chart - Remove CRD-based manifests (collector.yaml, rbac.yaml) - Update helm-values.yaml with Loki labels and env vars - Simplify architecture: Helm -> DaemonSet (no Operator) --- kustomization.yaml | 1 - opentelemetry-collector/argocd.yaml | 12 +- opentelemetry-collector/helm-values.yaml | 32 ++- opentelemetry-collector/kustomization.yaml | 5 - .../manifests/collector.yaml | 233 ------------------ .../manifests/kustomization.yaml | 6 - opentelemetry-collector/manifests/rbac.yaml | 85 ------- opentelemetry-operator/argocd.yaml | 44 ---- opentelemetry-operator/helm-values.yaml | 45 ---- opentelemetry-operator/kustomization.yaml | 5 - 10 files changed, 38 insertions(+), 430 deletions(-) delete mode 100644 opentelemetry-collector/kustomization.yaml delete mode 100644 opentelemetry-collector/manifests/collector.yaml delete mode 100644 opentelemetry-collector/manifests/kustomization.yaml delete mode 100644 opentelemetry-collector/manifests/rbac.yaml delete mode 100644 opentelemetry-operator/argocd.yaml delete mode 100644 opentelemetry-operator/helm-values.yaml delete mode 100644 opentelemetry-operator/kustomization.yaml diff --git a/kustomization.yaml b/kustomization.yaml index de54b3b..e616f39 100644 --- a/kustomization.yaml +++ b/kustomization.yaml @@ -12,7 +12,6 @@ resources: - loki/argocd.yaml # promtail removed - OTel filelog receiver handles log collection - tempo/argocd.yaml - - opentelemetry-operator/argocd.yaml - opentelemetry-collector/argocd.yaml - node-exporter/argocd.yaml - kube-state-metrics/argocd.yaml diff --git a/opentelemetry-collector/argocd.yaml b/opentelemetry-collector/argocd.yaml index 2e8a680..71b4b16 100644 --- a/opentelemetry-collector/argocd.yaml +++ b/opentelemetry-collector/argocd.yaml @@ -9,10 +9,16 @@ metadata: argocd.argoproj.io/sync-wave: "1" spec: project: default - source: - repoURL: https://github0213.com/K3S-HOME/observability.git + sources: + - repoURL: https://open-telemetry.github.io/opentelemetry-helm-charts + chart: opentelemetry-collector + targetRevision: 0.108.0 + helm: + valueFiles: + - $values/opentelemetry-collector/helm-values.yaml + - repoURL: https://github0213.com/K3S-HOME/observability.git targetRevision: main - path: opentelemetry-collector/manifests + ref: values destination: server: https://kubernetes.default.svc namespace: opentelemetry diff --git a/opentelemetry-collector/helm-values.yaml b/opentelemetry-collector/helm-values.yaml index 9303d9d..2944b65 100644 --- a/opentelemetry-collector/helm-values.yaml +++ b/opentelemetry-collector/helm-values.yaml @@ -32,6 +32,23 @@ resources: limits: memory: 512Mi +# ============================================================================= +# Environment Variables +# ============================================================================= +extraEnvs: + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: K8S_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + # ============================================================================= # Extra Volumes for Log Collection # ============================================================================= @@ -152,6 +169,13 @@ config: - type: move from: attributes.log to: body + # Loki label hints - tell Loki exporter which attributes to use as labels + - type: add + field: resource["loki.resource.labels"] + value: "k8s.namespace.name, k8s.pod.name, k8s.container.name, k8s.node.name" + - type: add + field: attributes["loki.attribute.labels"] + value: "log.iostream" # Prometheus receiver - self metrics only # Infrastructure metrics (node-exporter, kube-state-metrics) handled by Prometheus @@ -160,9 +184,9 @@ config: scrape_configs: # OTel Collector self metrics only - job_name: 'otel-collector' - scrape_interval: 30s + scrape_interval: 60s static_configs: - - targets: ['${env:MY_POD_IP}:8888'] + - targets: ['${env:K8S_POD_IP}:8888'] # --------------------------------------------------------------------------- # Processors - how data is transformed @@ -220,12 +244,14 @@ config: endpoint: http://prometheus-kube-prometheus-prometheus.prometheus.svc:9090/api/v1/write tls: insecure: true + external_labels: + otel_collector: ${env:K8S_POD_NAME} # Loki for logs loki: endpoint: http://loki.loki.svc.cluster.local:3100/loki/api/v1/push default_labels_enabled: - exporter: true + exporter: false level: true # Debug exporter (for troubleshooting) diff --git a/opentelemetry-collector/kustomization.yaml b/opentelemetry-collector/kustomization.yaml deleted file mode 100644 index 418e1ce..0000000 --- a/opentelemetry-collector/kustomization.yaml +++ /dev/null @@ -1,5 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization - -resources: - - argocd.yaml diff --git a/opentelemetry-collector/manifests/collector.yaml b/opentelemetry-collector/manifests/collector.yaml deleted file mode 100644 index b07201d..0000000 --- a/opentelemetry-collector/manifests/collector.yaml +++ /dev/null @@ -1,233 +0,0 @@ -# OpenTelemetry Collector with Target Allocator -# Managed by OpenTelemetry Operator -# -# Architecture: -# - DaemonSet mode: one collector per node for log collection -# - Target Allocator: distributes scrape targets across collectors -# - Filelog receiver for container logs -# - Prometheus receiver with Target Allocator for metrics -# - Exports to: Tempo (traces), Prometheus (metrics), Loki (logs) -apiVersion: opentelemetry.io/v1beta1 -kind: OpenTelemetryCollector -metadata: - name: otel-collector - namespace: opentelemetry -spec: - mode: daemonset - image: otel/opentelemetry-collector-contrib:0.113.0 - serviceAccount: otel-collector - - # Target Allocator disabled - metrics collected by Prometheus directly - # OTel handles logs (filelog) and traces (otlp) only - - resources: - requests: - cpu: 50m - memory: 512Mi - limits: - memory: 512Mi - - volumeMounts: - - name: varlogpods - mountPath: /var/log/pods - readOnly: true - - name: varlibdockercontainers - mountPath: /var/lib/docker/containers - readOnly: true - - volumes: - - name: varlogpods - hostPath: - path: /var/log/pods - - name: varlibdockercontainers - hostPath: - path: /var/lib/docker/containers - - ports: - - name: otlp-grpc - port: 4317 - protocol: TCP - targetPort: 4317 - - name: otlp-http - port: 4318 - protocol: TCP - targetPort: 4318 - - name: metrics - port: 8888 - protocol: TCP - targetPort: 8888 - - env: - - name: K8S_NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: K8S_POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: K8S_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - config: - receivers: - otlp: - protocols: - grpc: - endpoint: 0.0.0.0:4317 - http: - endpoint: 0.0.0.0:4318 - - # Filelog receiver for container logs - filelog: - include: - - /var/log/pods/*/*/*.log - exclude: - - /var/log/pods/opentelemetry_otel-collector*/*/*.log - start_at: end - include_file_path: true - include_file_name: false - operators: - - type: router - id: get-format - routes: - - output: parser-docker - expr: 'body matches "^\\{"' - - output: parser-containerd - expr: 'body matches "^[^ Z]+Z"' - default: parser-containerd - - - type: json_parser - id: parser-docker - output: extract-metadata-from-filepath - timestamp: - parse_from: attributes.time - layout: '%Y-%m-%dT%H:%M:%S.%LZ' - - - type: regex_parser - id: parser-containerd - regex: '^(?P