diff --git a/kustomization.yaml b/kustomization.yaml index 9797788..de54b3b 100644 --- a/kustomization.yaml +++ b/kustomization.yaml @@ -10,8 +10,9 @@ resources: - alertmanager/argocd.yaml - grafana/argocd.yaml - loki/argocd.yaml - - promtail/argocd.yaml + # promtail removed - OTel filelog receiver handles log collection - tempo/argocd.yaml + - opentelemetry-operator/argocd.yaml - opentelemetry-collector/argocd.yaml - node-exporter/argocd.yaml - kube-state-metrics/argocd.yaml diff --git a/opentelemetry-collector/argocd.yaml b/opentelemetry-collector/argocd.yaml index bc08499..5fa0ca6 100644 --- a/opentelemetry-collector/argocd.yaml +++ b/opentelemetry-collector/argocd.yaml @@ -5,18 +5,14 @@ metadata: namespace: argocd finalizers: - resources-finalizer.argocd.argoproj.io + annotations: + argocd.argoproj.io/sync-wave: "1" spec: project: default - sources: - - repoURL: https://open-telemetry.github.io/opentelemetry-helm-charts - chart: opentelemetry-collector - targetRevision: 0.108.0 - helm: - valueFiles: - - $values/opentelemetry-collector/helm-values.yaml - - repoURL: https://github.com/K3S-HOME/observability.git + source: + repoURL: https://github.com/K3S-HOME/observability.git targetRevision: main - ref: values + path: opentelemetry-collector/manifests destination: server: https://kubernetes.default.svc namespace: opentelemetry @@ -29,6 +25,7 @@ spec: - CreateNamespace=true - PrunePropagationPolicy=foreground - PruneLast=true + - ServerSideApply=true retry: limit: 5 backoff: diff --git a/opentelemetry-collector/helm-values.yaml b/opentelemetry-collector/helm-values.yaml index 76b52cf..39c7761 100644 --- a/opentelemetry-collector/helm-values.yaml +++ b/opentelemetry-collector/helm-values.yaml @@ -4,6 +4,8 @@ # Architecture: # - DaemonSet mode: one collector per node for efficient data collection # - OTLP receiver for traces, metrics, and logs +# - Filelog receiver for container logs (replaces Promtail) +# - Prometheus receiver for metrics scraping (replaces Prometheus scrape) # - Exports to: Tempo (traces), Prometheus (metrics), Loki (logs) # # Pipeline: @@ -21,14 +23,14 @@ image: mode: daemonset # ============================================================================= -# Resource Limits (optimized for small cluster) +# Resource Limits (increased for log + metrics collection) # ============================================================================= resources: requests: - cpu: 25m - memory: 64Mi + cpu: 50m + memory: 256Mi limits: - memory: 64Mi + memory: 512Mi # ============================================================================= # Tolerations (run on all nodes including master) @@ -38,6 +40,25 @@ tolerations: operator: Exists effect: NoSchedule +# ============================================================================= +# Extra Volumes for Log Collection +# ============================================================================= +extraVolumes: + - name: varlogpods + hostPath: + path: /var/log/pods + - name: varlibdockercontainers + hostPath: + path: /var/lib/docker/containers + +extraVolumeMounts: + - name: varlogpods + mountPath: /var/log/pods + readOnly: true + - name: varlibdockercontainers + mountPath: /var/lib/docker/containers + readOnly: true + # ============================================================================= # Ports # ============================================================================= @@ -64,8 +85,11 @@ ports: # OpenTelemetry Collector Configuration # ============================================================================= config: + # --------------------------------------------------------------------------- # Receivers - what data the collector accepts + # --------------------------------------------------------------------------- receivers: + # OTLP receiver for application telemetry otlp: protocols: grpc: @@ -73,7 +97,84 @@ config: http: endpoint: 0.0.0.0:4318 + # Filelog receiver for container logs (replaces Promtail) + filelog: + include: + - /var/log/pods/*/*/*.log + exclude: + # Exclude collector's own logs to prevent feedback loop + - /var/log/pods/opentelemetry_opentelemetry-collector*/*/*.log + start_at: end + include_file_path: true + include_file_name: false + operators: + # Route based on log format + - type: router + id: get-format + routes: + - output: parser-docker + expr: 'body matches "^\\{"' + - output: parser-containerd + expr: 'body matches "^[^ Z]+Z"' + default: parser-containerd + + # Docker JSON format parser + - type: json_parser + id: parser-docker + output: extract-metadata-from-filepath + timestamp: + parse_from: attributes.time + layout: '%Y-%m-%dT%H:%M:%S.%LZ' + + # Containerd/CRI format parser + - type: regex_parser + id: parser-containerd + regex: '^(?P