diff --git a/OpenTelemetryPlan/05-configuration-reference.md b/OpenTelemetryPlan/05-configuration-reference.md index b13cc839ab..46f511a8b2 100644 --- a/OpenTelemetryPlan/05-configuration-reference.md +++ b/OpenTelemetryPlan/05-configuration-reference.md @@ -386,12 +386,18 @@ exporters: tls: insecure: true + # Grafana Tempo for trace storage + otlp/tempo: + endpoint: tempo:4317 + tls: + insecure: true + service: pipelines: traces: receivers: [otlp] processors: [batch] - exporters: [logging, jaeger] + exporters: [logging, jaeger, otlp/tempo] ``` ### 5.5.2 Production Configuration @@ -533,6 +539,17 @@ services: - "16686:16686" # UI - "14250:14250" # gRPC + # Grafana Tempo for trace storage (recommended for production) + tempo: + image: grafana/tempo:2.7.2 + container_name: tempo + command: ["-config.file=/etc/tempo.yaml"] + volumes: + - ./tempo.yaml:/etc/tempo.yaml:ro + - tempo-data:/var/tempo + ports: + - "3200:3200" # HTTP API + # Grafana for dashboards grafana: image: grafana/grafana:10.2.3 @@ -547,6 +564,7 @@ services: - "3000:3000" depends_on: - jaeger + - tempo # Prometheus for metrics (optional, for correlation) prometheus: diff --git a/docker/telemetry/docker-compose.yml b/docker/telemetry/docker-compose.yml index e6780ac2c3..491a3c78e7 100644 --- a/docker/telemetry/docker-compose.yml +++ b/docker/telemetry/docker-compose.yml @@ -1,10 +1,14 @@ # Docker Compose stack for rippled OpenTelemetry observability. # -# Provides three services for local development: +# Provides services for local development: # - otel-collector: receives OTLP traces from rippled, batches and -# forwards them to Jaeger. Listens on ports 4317 (gRPC) and 4318 (HTTP). +# forwards them to Jaeger and Tempo. Listens on ports 4317 (gRPC) +# and 4318 (HTTP). # - jaeger: all-in-one tracing backend with UI on port 16686. -# - grafana: dashboards on port 3000, pre-configured with Jaeger datasource. +# - tempo: Grafana Tempo tracing backend, queryable via Grafana Explore +# on port 3000. Recommended for production (S3/GCS storage, TraceQL). +# - grafana: dashboards on port 3000, pre-configured with Jaeger, Tempo +# datasources. # # Usage: # docker compose -f docker/telemetry/docker-compose.yml up -d @@ -28,6 +32,7 @@ services: - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro depends_on: - jaeger + - tempo networks: - rippled-telemetry @@ -41,6 +46,17 @@ services: networks: - rippled-telemetry + tempo: + image: grafana/tempo:2.7.2 + command: ["-config.file=/etc/tempo.yaml"] + ports: + - "3200:3200" # Tempo HTTP API (health, query) + volumes: + - ./tempo.yaml:/etc/tempo.yaml:ro + - tempo-data:/var/tempo + networks: + - rippled-telemetry + grafana: image: grafana/grafana:latest environment: @@ -52,9 +68,13 @@ services: - ./grafana/provisioning:/etc/grafana/provisioning:ro depends_on: - jaeger + - tempo networks: - rippled-telemetry +volumes: + tempo-data: + networks: rippled-telemetry: driver: bridge diff --git a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml new file mode 100644 index 0000000000..0cf9dbd6b0 --- /dev/null +++ b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml @@ -0,0 +1,16 @@ +# Grafana datasource provisioning for Grafana Tempo. +# Auto-configures Tempo as a trace data source on Grafana startup. +# Access Grafana at http://localhost:3000, then use Explore -> Tempo +# to browse rippled traces using TraceQL. + +apiVersion: 1 + +datasources: + - name: Tempo + type: tempo + access: proxy + url: http://tempo:3200 + uid: tempo + jsonData: + nodeGraph: + enabled: true diff --git a/docker/telemetry/otel-collector-config.yaml b/docker/telemetry/otel-collector-config.yaml index 0231023969..61937af6b1 100644 --- a/docker/telemetry/otel-collector-config.yaml +++ b/docker/telemetry/otel-collector-config.yaml @@ -1,8 +1,10 @@ # OpenTelemetry Collector configuration for rippled development. # -# Pipeline: OTLP receiver -> batch processor -> debug exporter + Jaeger. +# Pipeline: OTLP receiver -> batch processor -> debug + Jaeger + Tempo. # rippled sends traces via OTLP/HTTP to port 4318. The collector batches -# them and forwards to Jaeger via OTLP/gRPC on the Docker network. +# them and forwards to both Jaeger and Tempo via OTLP/gRPC on the Docker +# network. Jaeger provides a standalone UI at :16686; Tempo is queryable +# via Grafana Explore using TraceQL. receivers: otlp: @@ -24,10 +26,14 @@ exporters: endpoint: jaeger:4317 tls: insecure: true + otlp/tempo: + endpoint: tempo:4317 + tls: + insecure: true service: pipelines: traces: receivers: [otlp] processors: [batch] - exporters: [debug, otlp/jaeger] + exporters: [debug, otlp/jaeger, otlp/tempo] diff --git a/docker/telemetry/tempo.yaml b/docker/telemetry/tempo.yaml new file mode 100644 index 0000000000..da5f70ef09 --- /dev/null +++ b/docker/telemetry/tempo.yaml @@ -0,0 +1,36 @@ +# Grafana Tempo configuration for rippled telemetry stack. +# +# Runs in single-binary mode for local development. +# Receives traces via OTLP/gRPC from the OTel Collector and stores +# them locally. Queryable via Grafana Explore using the Tempo datasource. +# +# For production, replace local storage with S3/GCS backend and adjust +# retention via the compactor settings. See: +# https://grafana.com/docs/tempo/latest/configuration/ + +stream_over_http_enabled: true + +server: + http_listen_port: 3200 + +distributor: + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + +ingester: + max_block_duration: 5m + +compactor: + compaction: + block_retention: 1h + +storage: + trace: + backend: local + wal: + path: /var/tempo/wal + local: + path: /var/tempo/blocks