mirror of
https://github.com/XRPLF/rippled.git
synced 2026-04-29 15:37:57 +00:00
Integrate the existing StatsD metrics pipeline (beast::insight) into the OpenTelemetry observability stack and add new trace spans for ledger build/store/validate and peer proposal/validation receive. Phase 5b — Ledger, peer, and transaction spans: - Add ledger.build span with close time attributes in BuildLedger.cpp - Add tx.apply span with tx_count/tx_failed in BuildLedger.cpp - Add ledger.store and ledger.validate spans in LedgerMaster.cpp - Add peer.proposal.receive span with trusted attribute in PeerImp.cpp - Add peer.validation.receive span with ledger_hash, full, trusted attributes in PeerImp.cpp - Add ledger-operations and peer-network Grafana dashboards Phase 6 — StatsD metrics integration: - Add StatsD UDP receiver (port 8125) to OTel Collector - Add 5 StatsD Grafana dashboards: node health, network traffic, overlay traffic detail, ledger data sync, RPC pathfinding - Add 09-data-collection-reference.md cataloging all metrics/spans - Update existing dashboards with new span panels - Expand telemetry runbook and integration test script - Add codecov exclusions for telemetry modules Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
96 lines
3.2 KiB
YAML
96 lines
3.2 KiB
YAML
# Docker Compose stack for xrpld OpenTelemetry observability.
|
|
#
|
|
# Provides services for local development:
|
|
# - otel-collector: receives OTLP traces from xrpld, batches and
|
|
# forwards them to Tempo. Listens on ports 4317 (gRPC)
|
|
# and 4318 (HTTP).
|
|
# - tempo: Grafana Tempo tracing backend, queryable via Grafana Explore
|
|
# on port 3000. Recommended for production (S3/GCS storage, TraceQL).
|
|
# - grafana: dashboards on port 3000, pre-configured with Tempo
|
|
# and Prometheus datasources.
|
|
#
|
|
# Usage:
|
|
# docker compose -f docker/telemetry/docker-compose.yml up -d
|
|
#
|
|
# Configure xrpld to export traces by adding to xrpld.cfg:
|
|
# [telemetry]
|
|
# enabled=1
|
|
# endpoint=http://localhost:4318/v1/traces
|
|
|
|
services:
|
|
# OpenTelemetry Collector: receives spans from xrpld via OTLP protocol,
|
|
# batches them for efficiency, and forwards to Tempo for storage.
|
|
otel-collector:
|
|
image: otel/opentelemetry-collector-contrib:0.121.0
|
|
command: ["--config=/etc/otel-collector-config.yaml"]
|
|
ports:
|
|
- "4317:4317" # OTLP gRPC
|
|
- "4318:4318" # OTLP HTTP
|
|
- "8125:8125/udp" # StatsD UDP (beast::insight metrics)
|
|
- "8889:8889" # Prometheus metrics (spanmetrics + statsd)
|
|
- "13133:13133" # Health check
|
|
volumes:
|
|
# Mount collector pipeline config (receivers → processors → exporters)
|
|
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
|
|
depends_on:
|
|
- tempo
|
|
networks:
|
|
- xrpld-telemetry
|
|
|
|
# Grafana Tempo: distributed tracing backend that stores and indexes
|
|
# spans. Queryable via TraceQL in Grafana Explore.
|
|
tempo:
|
|
image: grafana/tempo:2.7.2
|
|
command: ["-config.file=/etc/tempo.yaml"]
|
|
ports:
|
|
- "3200:3200" # Tempo HTTP API (health check, query)
|
|
volumes:
|
|
# Mount Tempo storage and ingestion config
|
|
- ./tempo.yaml:/etc/tempo.yaml:ro
|
|
# Persistent volume for trace data (WAL + blocks)
|
|
- tempo-data:/var/tempo
|
|
networks:
|
|
- xrpld-telemetry
|
|
|
|
prometheus:
|
|
image: prom/prometheus:latest
|
|
ports:
|
|
- "9090:9090"
|
|
volumes:
|
|
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
|
depends_on:
|
|
- otel-collector
|
|
networks:
|
|
- xrpld-telemetry
|
|
|
|
# Grafana: visualization UI with Tempo pre-configured as a datasource.
|
|
# Anonymous admin access enabled for local development convenience.
|
|
grafana:
|
|
image: grafana/grafana:11.5.2
|
|
environment:
|
|
- GF_AUTH_ANONYMOUS_ENABLED=true # No login required for local dev
|
|
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin # Full access without auth
|
|
ports:
|
|
- "3000:3000" # Grafana web UI
|
|
volumes:
|
|
# Auto-provision Tempo datasource and search filters on startup
|
|
- ./grafana/provisioning:/etc/grafana/provisioning:ro
|
|
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
|
|
depends_on:
|
|
- tempo
|
|
- prometheus
|
|
networks:
|
|
- xrpld-telemetry
|
|
|
|
# Named volume for Tempo trace storage (WAL and compacted blocks).
|
|
# Data persists across container restarts. Remove with:
|
|
# docker compose -f docker/telemetry/docker-compose.yml down -v
|
|
volumes:
|
|
tempo-data:
|
|
|
|
# Isolated bridge network so services communicate by container name
|
|
# (e.g., the collector reaches Tempo at http://tempo:4317).
|
|
networks:
|
|
xrpld-telemetry:
|
|
driver: bridge
|