feat(telemetry): add Loki service and filelog receiver for Phase 8 log ingestion

Cherry-pick Loki infrastructure from phase-10 back to where it belongs
(Phase 8, Tasks 8.2/8.3):

- Add Loki 3.4.2 service to docker-compose.yml (port 3100)
- Add filelog receiver to OTel Collector config (tails debug.log,
  regex_parser extracts trace_id/span_id/partition/severity)
- Add otlphttp/loki exporter (uses Loki 3.x native OTLP ingestion)
- Add logs pipeline: filelog -> batch -> otlphttp/loki
- Add health_check extension
- Mount xrpld log directory into collector container
- Add prometheus-data and loki-data persistent volumes

StatsD receiver intentionally excluded — Phase 7 migrated to native
OTLP metrics, making the StatsD receiver unnecessary.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Pratik Mankawde
2026-05-06 14:55:45 +01:00
parent fac6c3ac1d
commit 85330920ac
2 changed files with 71 additions and 4 deletions

View File

@@ -2,12 +2,15 @@
#
# Provides services for local development:
# - otel-collector: receives OTLP traces from xrpld, batches and
# forwards them to Tempo. Listens on ports 4317 (gRPC)
# and 4318 (HTTP).
# forwards them to Tempo. Also tails xrpld log files
# via filelog receiver and exports to Loki. Listens on ports
# 4317 (gRPC) and 4318 (HTTP).
# - tempo: Grafana Tempo tracing backend, queryable via Grafana Explore
# on port 3000. Recommended for production (S3/GCS storage, TraceQL).
# - grafana: dashboards on port 3000, pre-configured with Tempo
# and Prometheus datasources.
# - loki: Grafana Loki log aggregation backend for centralized log
# ingestion and log-trace correlation (Phase 8).
# - grafana: dashboards on port 3000, pre-configured with Tempo,
# Prometheus, and Loki datasources.
#
# Usage:
# docker compose -f docker/telemetry/docker-compose.yml up -d
@@ -33,8 +36,13 @@ services:
volumes:
# Mount collector pipeline config (receivers → processors → exporters)
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
# Phase 8: Mount rippled log directory for filelog receiver.
# The integration test writes logs to /tmp/xrpld-integration/;
# mount it read-only so the collector can tail debug.log files.
- /tmp/xrpld-integration:/var/log/rippled:ro
depends_on:
- tempo
- loki
networks:
- xrpld-telemetry
@@ -53,12 +61,27 @@ services:
networks:
- xrpld-telemetry
# Phase 8: Grafana Loki for centralized log ingestion and log-trace
# correlation. Loki 3.x supports native OTLP ingestion, so the OTel
# Collector exports via otlphttp to Loki's /otlp endpoint.
# Query logs via Grafana Explore -> Loki at http://localhost:3000.
loki:
image: grafana/loki:3.4.2
ports:
- "3100:3100"
command: -config.file=/etc/loki/local-config.yaml
volumes:
- loki-data:/loki
networks:
- xrpld-telemetry
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus-data:/prometheus
depends_on:
- otel-collector
networks:
@@ -80,6 +103,7 @@ services:
depends_on:
- tempo
- prometheus
- loki
networks:
- xrpld-telemetry
@@ -88,6 +112,8 @@ services:
# docker compose -f docker/telemetry/docker-compose.yml down -v
volumes:
tempo-data:
prometheus-data:
loki-data:
# Isolated bridge network so services communicate by container name
# (e.g., the collector reaches Tempo at http://tempo:4317).

View File

@@ -3,10 +3,26 @@
# Pipelines:
# traces: OTLP receiver -> batch processor -> debug + Tempo + spanmetrics
# metrics: OTLP receiver + spanmetrics connector -> Prometheus exporter
# logs: filelog receiver -> batch processor -> otlphttp/Loki (Phase 8)
#
# xrpld sends traces via OTLP/HTTP to port 4318. The collector batches
# them, forwards to Tempo, and derives RED metrics via the spanmetrics
# connector, which Prometheus scrapes on port 8889.
#
# xrpld sends beast::insight metrics natively via OTLP/HTTP to port 4318
# (same endpoint as traces). The OTLP receiver feeds both the traces and
# metrics pipelines. Metrics are exported to Prometheus alongside
# span-derived metrics.
#
# Phase 8: The filelog receiver tails xrpld's debug.log files under
# /var/log/rippled/ (mounted from the host). A regex_parser operator
# extracts timestamp, partition, severity, and optional trace_id/span_id
# fields injected by Logs::format(). Parsed logs are exported to Grafana
# Loki for log-trace correlation.
extensions:
health_check:
endpoint: 0.0.0.0:13133
receivers:
otlp:
@@ -15,6 +31,18 @@ receivers:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
# Phase 8: Filelog receiver tails xrpld debug.log files for log-trace
# correlation. Extracts structured fields (timestamp, partition, severity,
# trace_id, span_id, message) via regex. The trace_id and span_id are
# optional — only present when the log was emitted within an active span.
filelog:
include: [/var/log/rippled/*/debug.log]
operators:
- type: regex_parser
regex: '^(?P<timestamp>\S+)\s+(?P<partition>\S+):(?P<severity>\S+)\s+(?:trace_id=(?P<trace_id>[a-f0-9]+)\s+span_id=(?P<span_id>[a-f0-9]+)\s+)?(?P<message>.*)$'
timestamp:
parse_from: attributes.timestamp
layout: "%Y-%b-%d %H:%M:%S.%f"
processors:
batch:
@@ -45,10 +73,16 @@ exporters:
endpoint: tempo:4317
tls:
insecure: true
# Phase 8: Export logs to Grafana Loki via OTLP/HTTP. Loki 3.x supports
# native OTLP ingestion on its /otlp endpoint, replacing the removed
# loki exporter (dropped in otel-collector-contrib v0.147.0).
otlphttp/loki:
endpoint: http://loki:3100/otlp
prometheus:
endpoint: 0.0.0.0:8889
service:
extensions: [health_check]
pipelines:
traces:
receivers: [otlp]
@@ -56,4 +90,11 @@ service:
exporters: [debug, otlp/tempo, spanmetrics]
metrics:
receivers: [otlp, spanmetrics]
processors: [batch]
exporters: [prometheus]
# Phase 8: Log pipeline ingests xrpld debug.log via filelog receiver,
# batches entries, and exports to Loki for log-trace correlation.
logs:
receivers: [filelog]
processors: [batch]
exporters: [otlphttp/loki]