mirror of
https://github.com/XRPLF/rippled.git
synced 2026-04-29 15:37:57 +00:00
Add the observability stack deployment infrastructure and integration test framework for verifying end-to-end trace export. - Add Grafana dashboards: RPC performance, transaction overview, consensus health (pre-provisioned via dashboards.yaml) - Add Prometheus config for spanmetrics collection from OTel Collector - Update OTel Collector config with spanmetrics connector and prometheus exporter for RED metrics - Add docker-compose services: prometheus, dashboard provisioning - Add integration-test.sh with Tempo API-based span verification (replaces previous Jaeger-based approach) - Add TESTING.md with step-by-step deployment and verification guide - Add telemetry-runbook.md for production operations reference - Add xrpld-telemetry.cfg sample configuration - Add toDisplayString() for ConsensusMode (human-readable span values) - Update Phase 2/3 task lists with known issues sections - Add Phase 5 integration test task list - Add TraceContext protobuf fields for future relay propagation - Wire telemetry lifecycle (setServiceInstanceId/start/stop) in Application.cpp Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
95 lines
3.2 KiB
YAML
95 lines
3.2 KiB
YAML
# Docker Compose stack for xrpld OpenTelemetry observability.
|
|
#
|
|
# Provides services for local development:
|
|
# - otel-collector: receives OTLP traces from xrpld, batches and
|
|
# forwards them to Tempo. Listens on ports 4317 (gRPC)
|
|
# and 4318 (HTTP).
|
|
# - tempo: Grafana Tempo tracing backend, queryable via Grafana Explore
|
|
# on port 3000. Recommended for production (S3/GCS storage, TraceQL).
|
|
# - grafana: dashboards on port 3000, pre-configured with Tempo
|
|
# and Prometheus datasources.
|
|
#
|
|
# Usage:
|
|
# docker compose -f docker/telemetry/docker-compose.yml up -d
|
|
#
|
|
# Configure xrpld to export traces by adding to xrpld.cfg:
|
|
# [telemetry]
|
|
# enabled=1
|
|
# endpoint=http://localhost:4318/v1/traces
|
|
|
|
services:
|
|
# OpenTelemetry Collector: receives spans from xrpld via OTLP protocol,
|
|
# batches them for efficiency, and forwards to Tempo for storage.
|
|
otel-collector:
|
|
image: otel/opentelemetry-collector-contrib:0.121.0
|
|
command: ["--config=/etc/otel-collector-config.yaml"]
|
|
ports:
|
|
- "4317:4317" # OTLP gRPC receiver
|
|
- "4318:4318" # OTLP HTTP receiver (xrpld sends traces here)
|
|
- "8889:8889" # Prometheus metrics (spanmetrics)
|
|
- "13133:13133" # Health check endpoint
|
|
volumes:
|
|
# Mount collector pipeline config (receivers → processors → exporters)
|
|
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
|
|
depends_on:
|
|
- tempo
|
|
networks:
|
|
- xrpld-telemetry
|
|
|
|
# Grafana Tempo: distributed tracing backend that stores and indexes
|
|
# spans. Queryable via TraceQL in Grafana Explore.
|
|
tempo:
|
|
image: grafana/tempo:2.7.2
|
|
command: ["-config.file=/etc/tempo.yaml"]
|
|
ports:
|
|
- "3200:3200" # Tempo HTTP API (health check, query)
|
|
volumes:
|
|
# Mount Tempo storage and ingestion config
|
|
- ./tempo.yaml:/etc/tempo.yaml:ro
|
|
# Persistent volume for trace data (WAL + blocks)
|
|
- tempo-data:/var/tempo
|
|
networks:
|
|
- xrpld-telemetry
|
|
|
|
prometheus:
|
|
image: prom/prometheus:latest
|
|
ports:
|
|
- "9090:9090"
|
|
volumes:
|
|
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
|
depends_on:
|
|
- otel-collector
|
|
networks:
|
|
- xrpld-telemetry
|
|
|
|
# Grafana: visualization UI with Tempo pre-configured as a datasource.
|
|
# Anonymous admin access enabled for local development convenience.
|
|
grafana:
|
|
image: grafana/grafana:11.5.2
|
|
environment:
|
|
- GF_AUTH_ANONYMOUS_ENABLED=true # No login required for local dev
|
|
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin # Full access without auth
|
|
ports:
|
|
- "3000:3000" # Grafana web UI
|
|
volumes:
|
|
# Auto-provision Tempo datasource and search filters on startup
|
|
- ./grafana/provisioning:/etc/grafana/provisioning:ro
|
|
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
|
|
depends_on:
|
|
- tempo
|
|
- prometheus
|
|
networks:
|
|
- xrpld-telemetry
|
|
|
|
# Named volume for Tempo trace storage (WAL and compacted blocks).
|
|
# Data persists across container restarts. Remove with:
|
|
# docker compose -f docker/telemetry/docker-compose.yml down -v
|
|
volumes:
|
|
tempo-data:
|
|
|
|
# Isolated bridge network so services communicate by container name
|
|
# (e.g., the collector reaches Tempo at http://tempo:4317).
|
|
networks:
|
|
xrpld-telemetry:
|
|
driver: bridge
|