Files
rippled/docker/telemetry/docker-compose.workload.yaml
Pratik Mankawde a142a700e8 refactor(telemetry): migrate Phase 10 validation from Jaeger to Tempo native API
Migrate validate_telemetry.py to Tempo TraceQL search API, remove
Jaeger service from workload docker-compose, update readiness checks.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-31 22:32:02 +01:00

103 lines
3.0 KiB
YAML

# Docker Compose workload harness for Phase 10 telemetry validation.
#
# Runs a 5-node validator cluster with full OTel telemetry stack:
# - 5 rippled validator nodes (consensus network)
# - OTel Collector (traces + StatsD metrics)
# - Tempo (trace backend + search API)
# - Prometheus (metrics)
# - Loki (log aggregation for log-trace correlation)
# - Grafana (dashboards + trace/log exploration)
#
# Usage:
# # Start the harness (requires pre-built xrpld image or mount binary):
# docker compose -f docker/telemetry/docker-compose.workload.yaml up -d
#
# # Or use the orchestrator:
# docker/telemetry/workload/run-full-validation.sh
#
# Prerequisites:
# - xrpld binary built with -DXRPL_ENABLE_TELEMETRY=ON
# - Validator keys generated via generate-validator-keys.sh
# - Node configs generated by run-full-validation.sh
#
# Note: No Docker healthchecks are defined here. The orchestrator script
# (run-full-validation.sh) polls each service endpoint directly from the
# host, which avoids issues with missing curl/wget in container images.
services:
# ---------------------------------------------------------------------------
# Telemetry Backend Stack
# ---------------------------------------------------------------------------
otel-collector:
image: otel/opentelemetry-collector-contrib:latest
command: ["--config=/etc/otel-collector-config.yaml"]
ports:
- "4317:4317" # OTLP gRPC
- "4318:4318" # OTLP HTTP
- "8125:8125/udp" # StatsD UDP (beast::insight metrics)
- "8889:8889" # Prometheus metrics endpoint
- "13133:13133" # Health check
volumes:
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
# Mount the validation workdir so filelog receiver can tail node logs.
- /tmp/xrpld-validation:/var/log/rippled:ro
depends_on:
- tempo
networks:
- workload-net
tempo:
image: grafana/tempo:2.7.2
command: ["-config.file=/etc/tempo.yaml"]
ports:
- "3200:3200" # Tempo HTTP API
volumes:
- ./tempo.yaml:/etc/tempo.yaml:ro
- tempo-data:/var/tempo
networks:
- workload-net
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
depends_on:
- otel-collector
networks:
- workload-net
loki:
image: grafana/loki:3.4.2
ports:
- "3100:3100" # Loki HTTP API
command: ["-config.file=/etc/loki/local-config.yaml"]
networks:
- workload-net
grafana:
image: grafana/grafana:latest
environment:
- GF_AUTH_ANONYMOUS_ENABLED=true
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
ports:
- "3000:3000"
volumes:
- ./grafana/provisioning:/etc/grafana/provisioning:ro
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro
depends_on:
- tempo
- prometheus
- loki
networks:
- workload-net
volumes:
tempo-data:
networks:
workload-net:
driver: bridge