From 85330920ac3ce8ad446590c531d706fb4031b80e Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Wed, 6 May 2026 14:55:45 +0100 Subject: [PATCH] feat(telemetry): add Loki service and filelog receiver for Phase 8 log ingestion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cherry-pick Loki infrastructure from phase-10 back to where it belongs (Phase 8, Tasks 8.2/8.3): - Add Loki 3.4.2 service to docker-compose.yml (port 3100) - Add filelog receiver to OTel Collector config (tails debug.log, regex_parser extracts trace_id/span_id/partition/severity) - Add otlphttp/loki exporter (uses Loki 3.x native OTLP ingestion) - Add logs pipeline: filelog -> batch -> otlphttp/loki - Add health_check extension - Mount xrpld log directory into collector container - Add prometheus-data and loki-data persistent volumes StatsD receiver intentionally excluded — Phase 7 migrated to native OTLP metrics, making the StatsD receiver unnecessary. Co-Authored-By: Claude Opus 4.7 (1M context) --- docker/telemetry/docker-compose.yml | 34 +++++++++++++++-- docker/telemetry/otel-collector-config.yaml | 41 +++++++++++++++++++++ 2 files changed, 71 insertions(+), 4 deletions(-) diff --git a/docker/telemetry/docker-compose.yml b/docker/telemetry/docker-compose.yml index b2db1be183..4fa3292888 100644 --- a/docker/telemetry/docker-compose.yml +++ b/docker/telemetry/docker-compose.yml @@ -2,12 +2,15 @@ # # Provides services for local development: # - otel-collector: receives OTLP traces from xrpld, batches and -# forwards them to Tempo. Listens on ports 4317 (gRPC) -# and 4318 (HTTP). +# forwards them to Tempo. Also tails xrpld log files +# via filelog receiver and exports to Loki. Listens on ports +# 4317 (gRPC) and 4318 (HTTP). # - tempo: Grafana Tempo tracing backend, queryable via Grafana Explore # on port 3000. Recommended for production (S3/GCS storage, TraceQL). -# - grafana: dashboards on port 3000, pre-configured with Tempo -# and Prometheus datasources. +# - loki: Grafana Loki log aggregation backend for centralized log +# ingestion and log-trace correlation (Phase 8). +# - grafana: dashboards on port 3000, pre-configured with Tempo, +# Prometheus, and Loki datasources. # # Usage: # docker compose -f docker/telemetry/docker-compose.yml up -d @@ -33,8 +36,13 @@ services: volumes: # Mount collector pipeline config (receivers → processors → exporters) - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro + # Phase 8: Mount rippled log directory for filelog receiver. + # The integration test writes logs to /tmp/xrpld-integration/; + # mount it read-only so the collector can tail debug.log files. + - /tmp/xrpld-integration:/var/log/rippled:ro depends_on: - tempo + - loki networks: - xrpld-telemetry @@ -53,12 +61,27 @@ services: networks: - xrpld-telemetry + # Phase 8: Grafana Loki for centralized log ingestion and log-trace + # correlation. Loki 3.x supports native OTLP ingestion, so the OTel + # Collector exports via otlphttp to Loki's /otlp endpoint. + # Query logs via Grafana Explore -> Loki at http://localhost:3000. + loki: + image: grafana/loki:3.4.2 + ports: + - "3100:3100" + command: -config.file=/etc/loki/local-config.yaml + volumes: + - loki-data:/loki + networks: + - xrpld-telemetry + prometheus: image: prom/prometheus:latest ports: - "9090:9090" volumes: - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus depends_on: - otel-collector networks: @@ -80,6 +103,7 @@ services: depends_on: - tempo - prometheus + - loki networks: - xrpld-telemetry @@ -88,6 +112,8 @@ services: # docker compose -f docker/telemetry/docker-compose.yml down -v volumes: tempo-data: + prometheus-data: + loki-data: # Isolated bridge network so services communicate by container name # (e.g., the collector reaches Tempo at http://tempo:4317). diff --git a/docker/telemetry/otel-collector-config.yaml b/docker/telemetry/otel-collector-config.yaml index c600b409df..fc13b2442d 100644 --- a/docker/telemetry/otel-collector-config.yaml +++ b/docker/telemetry/otel-collector-config.yaml @@ -3,10 +3,26 @@ # Pipelines: # traces: OTLP receiver -> batch processor -> debug + Tempo + spanmetrics # metrics: OTLP receiver + spanmetrics connector -> Prometheus exporter +# logs: filelog receiver -> batch processor -> otlphttp/Loki (Phase 8) # # xrpld sends traces via OTLP/HTTP to port 4318. The collector batches # them, forwards to Tempo, and derives RED metrics via the spanmetrics # connector, which Prometheus scrapes on port 8889. +# +# xrpld sends beast::insight metrics natively via OTLP/HTTP to port 4318 +# (same endpoint as traces). The OTLP receiver feeds both the traces and +# metrics pipelines. Metrics are exported to Prometheus alongside +# span-derived metrics. +# +# Phase 8: The filelog receiver tails xrpld's debug.log files under +# /var/log/rippled/ (mounted from the host). A regex_parser operator +# extracts timestamp, partition, severity, and optional trace_id/span_id +# fields injected by Logs::format(). Parsed logs are exported to Grafana +# Loki for log-trace correlation. + +extensions: + health_check: + endpoint: 0.0.0.0:13133 receivers: otlp: @@ -15,6 +31,18 @@ receivers: endpoint: 0.0.0.0:4317 http: endpoint: 0.0.0.0:4318 + # Phase 8: Filelog receiver tails xrpld debug.log files for log-trace + # correlation. Extracts structured fields (timestamp, partition, severity, + # trace_id, span_id, message) via regex. The trace_id and span_id are + # optional — only present when the log was emitted within an active span. + filelog: + include: [/var/log/rippled/*/debug.log] + operators: + - type: regex_parser + regex: '^(?P\S+)\s+(?P\S+):(?P\S+)\s+(?:trace_id=(?P[a-f0-9]+)\s+span_id=(?P[a-f0-9]+)\s+)?(?P.*)$' + timestamp: + parse_from: attributes.timestamp + layout: "%Y-%b-%d %H:%M:%S.%f" processors: batch: @@ -45,10 +73,16 @@ exporters: endpoint: tempo:4317 tls: insecure: true + # Phase 8: Export logs to Grafana Loki via OTLP/HTTP. Loki 3.x supports + # native OTLP ingestion on its /otlp endpoint, replacing the removed + # loki exporter (dropped in otel-collector-contrib v0.147.0). + otlphttp/loki: + endpoint: http://loki:3100/otlp prometheus: endpoint: 0.0.0.0:8889 service: + extensions: [health_check] pipelines: traces: receivers: [otlp] @@ -56,4 +90,11 @@ service: exporters: [debug, otlp/tempo, spanmetrics] metrics: receivers: [otlp, spanmetrics] + processors: [batch] exporters: [prometheus] + # Phase 8: Log pipeline ingests xrpld debug.log via filelog receiver, + # batches entries, and exports to Loki for log-trace correlation. + logs: + receivers: [filelog] + processors: [batch] + exporters: [otlphttp/loki]