# Docker Compose workload harness for Phase 10 telemetry validation. # # Runs a 5-node validator cluster with full OTel telemetry stack: # - 5 rippled validator nodes (consensus network) # - OTel Collector (traces + StatsD metrics) # - Jaeger (trace search UI) # - Tempo (production trace backend) # - Prometheus (metrics) # - Loki (log aggregation for log-trace correlation) # - Grafana (dashboards + trace/log exploration) # # Usage: # # Start the harness (requires pre-built xrpld image or mount binary): # docker compose -f docker/telemetry/docker-compose.workload.yaml up -d # # # Or use the orchestrator: # docker/telemetry/workload/run-full-validation.sh # # Prerequisites: # - xrpld binary built with -DXRPL_ENABLE_TELEMETRY=ON # - Validator keys generated via generate-validator-keys.sh # - Node configs generated by run-full-validation.sh version: "3.8" services: # --------------------------------------------------------------------------- # Telemetry Backend Stack # --------------------------------------------------------------------------- otel-collector: image: otel/opentelemetry-collector-contrib:latest command: ["--config=/etc/otel-collector-config.yaml"] ports: - "4317:4317" # OTLP gRPC - "4318:4318" # OTLP HTTP - "8125:8125/udp" # StatsD UDP (beast::insight metrics) - "8889:8889" # Prometheus metrics endpoint - "13133:13133" # Health check volumes: - ../otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro depends_on: - jaeger - tempo networks: - workload-net healthcheck: test: ["CMD", "curl", "-f", "http://localhost:13133/"] interval: 5s timeout: 3s retries: 10 jaeger: image: jaegertracing/all-in-one:latest environment: - COLLECTOR_OTLP_ENABLED=true ports: - "16686:16686" # Jaeger UI - "14250:14250" # gRPC networks: - workload-net healthcheck: test: ["CMD", "curl", "-f", "http://localhost:16686/"] interval: 5s timeout: 3s retries: 10 tempo: image: grafana/tempo:2.7.2 command: ["-config.file=/etc/tempo.yaml"] ports: - "3200:3200" # Tempo HTTP API volumes: - ../tempo.yaml:/etc/tempo.yaml:ro - tempo-data:/var/tempo networks: - workload-net prometheus: image: prom/prometheus:latest ports: - "9090:9090" volumes: - ../prometheus.yml:/etc/prometheus/prometheus.yml:ro depends_on: otel-collector: condition: service_healthy networks: - workload-net healthcheck: test: ["CMD", "curl", "-f", "http://localhost:9090/-/healthy"] interval: 5s timeout: 3s retries: 10 loki: image: grafana/loki:2.9.4 ports: - "3100:3100" # Loki HTTP API command: ["-config.file=/etc/loki/local-config.yaml"] networks: - workload-net healthcheck: test: ["CMD", "curl", "-f", "http://localhost:3100/ready"] interval: 5s timeout: 3s retries: 10 grafana: image: grafana/grafana:latest environment: - GF_AUTH_ANONYMOUS_ENABLED=true - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin ports: - "3000:3000" volumes: - ../grafana/provisioning:/etc/grafana/provisioning:ro - ../grafana/dashboards:/var/lib/grafana/dashboards:ro depends_on: - jaeger - tempo - prometheus - loki networks: - workload-net healthcheck: test: ["CMD", "curl", "-f", "http://localhost:3000/api/health"] interval: 5s timeout: 3s retries: 10 volumes: tempo-data: networks: workload-net: driver: bridge