mirror of
https://github.com/XRPLF/rippled.git
synced 2026-03-15 09:12:25 +00:00
Phase 1b: Telemetry core infrastructure
Add the OpenTelemetry telemetry library and supporting infrastructure: Build system: - Conan opentelemetry-cpp dependency with OTLP/gRPC exporter - CMake integration for xrpl_telemetry library target - Levelization ordering updates Core library (libxrpl): - Telemetry class: provider lifecycle, span creation, sampling config - SpanGuard: RAII span management with attribute/exception helpers - TelemetryConfig: parse [telemetry] config section - NullTelemetry: no-op implementation when telemetry is disabled Application integration: - Telemetry member in ApplicationImp with start/stop lifecycle - getTelemetry() interface on Application - ServiceRegistry telemetry accessor Docker observability stack: - OTel Collector, Jaeger, Grafana docker-compose setup - Collector config with OTLP gRPC receiver and Jaeger exporter Config and docs: - Example telemetry config section in xrpld-example.cfg - Build documentation for telemetry setup Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -32,6 +32,8 @@ libxrpl.server > xrpl.server
|
||||
libxrpl.shamap > xrpl.basics
|
||||
libxrpl.shamap > xrpl.protocol
|
||||
libxrpl.shamap > xrpl.shamap
|
||||
libxrpl.telemetry > xrpl.basics
|
||||
libxrpl.telemetry > xrpl.telemetry
|
||||
libxrpl.tx > xrpl.basics
|
||||
libxrpl.tx > xrpl.conditions
|
||||
libxrpl.tx > xrpl.core
|
||||
@@ -206,6 +208,7 @@ xrpl.server > xrpl.shamap
|
||||
xrpl.shamap > xrpl.basics
|
||||
xrpl.shamap > xrpl.nodestore
|
||||
xrpl.shamap > xrpl.protocol
|
||||
xrpl.telemetry > xrpl.basics
|
||||
xrpl.tx > xrpl.basics
|
||||
xrpl.tx > xrpl.core
|
||||
xrpl.tx > xrpl.ledger
|
||||
@@ -224,6 +227,7 @@ xrpld.app > xrpl.rdb
|
||||
xrpld.app > xrpl.resource
|
||||
xrpld.app > xrpl.server
|
||||
xrpld.app > xrpl.shamap
|
||||
xrpld.app > xrpl.telemetry
|
||||
xrpld.app > xrpl.tx
|
||||
xrpld.consensus > xrpl.basics
|
||||
xrpld.consensus > xrpl.json
|
||||
|
||||
@@ -117,6 +117,18 @@ if(rocksdb)
|
||||
target_link_libraries(xrpl_libs INTERFACE RocksDB::rocksdb)
|
||||
endif()
|
||||
|
||||
# OpenTelemetry distributed tracing (optional).
|
||||
# When ON, links against opentelemetry-cpp and defines XRPL_ENABLE_TELEMETRY
|
||||
# so that tracing macros in TracingInstrumentation.h are compiled in.
|
||||
# When OFF (default), all tracing code compiles to no-ops with zero overhead.
|
||||
# Enable via: conan install -o telemetry=True, or cmake -Dtelemetry=ON.
|
||||
option(telemetry "Enable OpenTelemetry tracing" OFF)
|
||||
if (telemetry)
|
||||
find_package(opentelemetry-cpp CONFIG REQUIRED)
|
||||
add_compile_definitions(XRPL_ENABLE_TELEMETRY)
|
||||
message(STATUS "OpenTelemetry tracing enabled")
|
||||
endif ()
|
||||
|
||||
# Work around changes to Conan recipe for now.
|
||||
if(TARGET nudb::core)
|
||||
set(nudb nudb::core)
|
||||
|
||||
@@ -1529,3 +1529,46 @@ validators.txt
|
||||
# set to ssl_verify to 0.
|
||||
[ssl_verify]
|
||||
1
|
||||
#-------------------------------------------------------------------------------
|
||||
#
|
||||
# 11. Telemetry (OpenTelemetry Tracing)
|
||||
#
|
||||
#-------------------------------------------------------------------------------
|
||||
#
|
||||
# Enables distributed tracing via OpenTelemetry. Requires building with
|
||||
# -DXRPL_ENABLE_TELEMETRY=ON (telemetry Conan option).
|
||||
#
|
||||
# [telemetry]
|
||||
#
|
||||
# enabled=0
|
||||
#
|
||||
# Enable or disable telemetry at runtime. Default: 0 (disabled).
|
||||
#
|
||||
# endpoint=http://localhost:4318/v1/traces
|
||||
#
|
||||
# The OpenTelemetry Collector endpoint (OTLP/HTTP). Default: http://localhost:4318/v1/traces.
|
||||
#
|
||||
# exporter=otlp_http
|
||||
#
|
||||
# Exporter type: otlp_http. Default: otlp_http.
|
||||
#
|
||||
# sampling_ratio=1.0
|
||||
#
|
||||
# Fraction of traces to sample (0.0 to 1.0). Default: 1.0 (all traces).
|
||||
#
|
||||
# trace_rpc=1
|
||||
#
|
||||
# Enable RPC request tracing. Default: 1.
|
||||
#
|
||||
# trace_transactions=1
|
||||
#
|
||||
# Enable transaction lifecycle tracing. Default: 1.
|
||||
#
|
||||
# trace_consensus=1
|
||||
#
|
||||
# Enable consensus round tracing. Default: 1.
|
||||
#
|
||||
# trace_peer=0
|
||||
#
|
||||
# Enable peer message tracing (high volume). Default: 0.
|
||||
#
|
||||
|
||||
@@ -180,6 +180,17 @@ target_link_libraries(
|
||||
add_module(xrpl tx)
|
||||
target_link_libraries(xrpl.libxrpl.tx PUBLIC xrpl.libxrpl.ledger)
|
||||
|
||||
# Telemetry module — OpenTelemetry distributed tracing support.
|
||||
# Sources: include/xrpl/telemetry/ (headers), src/libxrpl/telemetry/ (impl).
|
||||
# When telemetry=ON, links the Conan-provided umbrella target
|
||||
# opentelemetry-cpp::opentelemetry-cpp (individual component targets like
|
||||
# ::api, ::sdk are not available in the Conan package).
|
||||
add_module(xrpl telemetry)
|
||||
target_link_libraries(xrpl.libxrpl.telemetry PUBLIC xrpl.libxrpl.basics xrpl.libxrpl.beast)
|
||||
if (telemetry)
|
||||
target_link_libraries(xrpl.libxrpl.telemetry PUBLIC opentelemetry-cpp::opentelemetry-cpp)
|
||||
endif ()
|
||||
|
||||
add_library(xrpl.libxrpl)
|
||||
set_target_properties(xrpl.libxrpl PROPERTIES OUTPUT_NAME xrpl)
|
||||
|
||||
@@ -210,6 +221,7 @@ target_link_modules(
|
||||
resource
|
||||
server
|
||||
shamap
|
||||
telemetry
|
||||
tx
|
||||
)
|
||||
|
||||
|
||||
11
conan.lock
11
conan.lock
@@ -10,10 +10,13 @@
|
||||
"rocksdb/10.5.1#4a197eca381a3e5ae8adf8cffa5aacd0%1765850186.86",
|
||||
"re2/20230301#ca3b241baec15bd31ea9187150e0b333%1765850148.103",
|
||||
"protobuf/6.32.1#f481fd276fc23a33b85a3ed1e898b693%1765850161.038",
|
||||
"openssl/3.5.5#05a4ac5b7323f7a329b2db1391d9941f%1769599205.414",
|
||||
"opentelemetry-cpp/1.18.0#efd9851e173f8a13b9c7d35232de8cf1%1750409186.472",
|
||||
"openssl/3.5.5#05a4ac5b7323f7a329b2db1391d9941f%1770229825.601",
|
||||
"nudb/2.0.9#0432758a24204da08fee953ec9ea03cb%1769436073.32",
|
||||
"nlohmann_json/3.11.3#45828be26eb619a2e04ca517bb7b828d%1701220705.259",
|
||||
"lz4/1.10.0#59fc63cac7f10fbe8e05c7e62c2f3504%1765850143.914",
|
||||
"libiconv/1.17#1e65319e945f2d31941a9d28cc13c058%1765842973.492",
|
||||
"libcurl/8.18.0#364bc3755cb9ef84ed9a7ae9c7efc1c1%1770984390.024",
|
||||
"libbacktrace/cci.20210118#a7691bfccd8caaf66309df196790a5a1%1765842973.03",
|
||||
"libarchive/3.8.1#ffee18995c706e02bf96e7a2f7042e0d%1765850144.736",
|
||||
"jemalloc/5.3.0#e951da9cf599e956cebc117880d2d9f8%1729241615.244",
|
||||
@@ -30,9 +33,15 @@
|
||||
"zlib/1.3.1#b8bc2603263cf7eccbd6e17e66b0ed76%1765850150.075",
|
||||
"strawberryperl/5.32.1.1#707032463aa0620fa17ec0d887f5fe41%1765850165.196",
|
||||
"protobuf/6.32.1#f481fd276fc23a33b85a3ed1e898b693%1765850161.038",
|
||||
"pkgconf/2.5.1#93c2051284cba1279494a43a4fcfeae2%1757684701.089",
|
||||
"opentelemetry-proto/1.4.0#4096a3b05916675ef9628f3ffd571f51%1732731336.11",
|
||||
"ninja/1.13.2#c8c5dc2a52ed6e4e42a66d75b4717ceb%1764096931.974",
|
||||
"nasm/2.16.01#31e26f2ee3c4346ecd347911bd126904%1765850144.707",
|
||||
"msys2/cci.latest#eea83308ad7e9023f7318c60d5a9e6cb%1770199879.083",
|
||||
"meson/1.10.0#60786758ea978964c24525de19603cf4%1768294926.103",
|
||||
"m4/1.4.19#70dc8bbb33e981d119d2acc0175cf381%1763158052.846",
|
||||
"libtool/2.4.7#14e7739cc128bc1623d2ed318008e47e%1755679003.847",
|
||||
"gnu-config/cci.20210814#466e9d4d7779e1c142443f7ea44b4284%1762363589.329",
|
||||
"cmake/4.2.0#ae0a44f44a1ef9ab68fd4b3e9a1f8671%1765850153.937",
|
||||
"cmake/3.31.10#313d16a1aa16bbdb2ca0792467214b76%1765850153.479",
|
||||
"b2/5.3.3#107c15377719889654eb9a162a673975%1765850144.355",
|
||||
|
||||
@@ -22,6 +22,7 @@ class Xrpl(ConanFile):
|
||||
"rocksdb": [True, False],
|
||||
"shared": [True, False],
|
||||
"static": [True, False],
|
||||
"telemetry": [True, False],
|
||||
"tests": [True, False],
|
||||
"unity": [True, False],
|
||||
"xrpld": [True, False],
|
||||
@@ -54,6 +55,7 @@ class Xrpl(ConanFile):
|
||||
"rocksdb": True,
|
||||
"shared": False,
|
||||
"static": True,
|
||||
"telemetry": True,
|
||||
"tests": False,
|
||||
"unity": False,
|
||||
"xrpld": False,
|
||||
@@ -140,6 +142,10 @@ class Xrpl(ConanFile):
|
||||
self.requires("jemalloc/5.3.0")
|
||||
if self.options.rocksdb:
|
||||
self.requires("rocksdb/10.5.1")
|
||||
# OpenTelemetry C++ SDK for distributed tracing (optional).
|
||||
# Provides OTLP/HTTP exporter, batch span processor, and trace API.
|
||||
if self.options.telemetry:
|
||||
self.requires("opentelemetry-cpp/1.18.0")
|
||||
self.requires("xxhash/0.8.3", **transitive_headers_opt)
|
||||
|
||||
exports_sources = (
|
||||
@@ -168,6 +174,7 @@ class Xrpl(ConanFile):
|
||||
tc.variables["rocksdb"] = self.options.rocksdb
|
||||
tc.variables["BUILD_SHARED_LIBS"] = self.options.shared
|
||||
tc.variables["static"] = self.options.static
|
||||
tc.variables["telemetry"] = self.options.telemetry
|
||||
tc.variables["unity"] = self.options.unity
|
||||
tc.variables["xrpld"] = self.options.xrpld
|
||||
tc.generate()
|
||||
@@ -220,3 +227,5 @@ class Xrpl(ConanFile):
|
||||
]
|
||||
if self.options.rocksdb:
|
||||
libxrpl.requires.append("rocksdb::librocksdb")
|
||||
if self.options.telemetry:
|
||||
libxrpl.requires.append("opentelemetry-cpp::opentelemetry-cpp")
|
||||
|
||||
60
docker/telemetry/docker-compose.yml
Normal file
60
docker/telemetry/docker-compose.yml
Normal file
@@ -0,0 +1,60 @@
|
||||
# Docker Compose stack for rippled OpenTelemetry observability.
|
||||
#
|
||||
# Provides three services for local development:
|
||||
# - otel-collector: receives OTLP traces from rippled, batches and
|
||||
# forwards them to Jaeger. Listens on ports 4317 (gRPC) and 4318 (HTTP).
|
||||
# - jaeger: all-in-one tracing backend with UI on port 16686.
|
||||
# - grafana: dashboards on port 3000, pre-configured with Jaeger datasource.
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f docker/telemetry/docker-compose.yml up -d
|
||||
#
|
||||
# Configure rippled to export traces by adding to xrpld.cfg:
|
||||
# [telemetry]
|
||||
# enabled=1
|
||||
# endpoint=http://localhost:4318/v1/traces
|
||||
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
otel-collector:
|
||||
image: otel/opentelemetry-collector-contrib:latest
|
||||
command: ["--config=/etc/otel-collector-config.yaml"]
|
||||
ports:
|
||||
- "4317:4317" # OTLP gRPC
|
||||
- "4318:4318" # OTLP HTTP
|
||||
- "13133:13133" # Health check
|
||||
volumes:
|
||||
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
|
||||
depends_on:
|
||||
- jaeger
|
||||
networks:
|
||||
- rippled-telemetry
|
||||
|
||||
jaeger:
|
||||
image: jaegertracing/all-in-one:latest
|
||||
environment:
|
||||
- COLLECTOR_OTLP_ENABLED=true
|
||||
ports:
|
||||
- "16686:16686" # Jaeger UI
|
||||
- "14250:14250" # gRPC
|
||||
networks:
|
||||
- rippled-telemetry
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
environment:
|
||||
- GF_AUTH_ANONYMOUS_ENABLED=true
|
||||
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
|
||||
ports:
|
||||
- "3000:3000"
|
||||
volumes:
|
||||
- ./grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
depends_on:
|
||||
- jaeger
|
||||
networks:
|
||||
- rippled-telemetry
|
||||
|
||||
networks:
|
||||
rippled-telemetry:
|
||||
driver: bridge
|
||||
@@ -0,0 +1,12 @@
|
||||
# Grafana datasource provisioning for the rippled telemetry stack.
|
||||
# Auto-configures Jaeger as a trace data source on Grafana startup.
|
||||
# Access Grafana at http://localhost:3000, then use Explore -> Jaeger
|
||||
# to browse rippled traces.
|
||||
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Jaeger
|
||||
type: jaeger
|
||||
access: proxy
|
||||
url: http://jaeger:16686
|
||||
33
docker/telemetry/otel-collector-config.yaml
Normal file
33
docker/telemetry/otel-collector-config.yaml
Normal file
@@ -0,0 +1,33 @@
|
||||
# OpenTelemetry Collector configuration for rippled development.
|
||||
#
|
||||
# Pipeline: OTLP receiver -> batch processor -> debug exporter + Jaeger.
|
||||
# rippled sends traces via OTLP/HTTP to port 4318. The collector batches
|
||||
# them and forwards to Jaeger via OTLP/gRPC on the Docker network.
|
||||
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
processors:
|
||||
batch:
|
||||
timeout: 1s
|
||||
send_batch_size: 100
|
||||
|
||||
exporters:
|
||||
debug:
|
||||
verbosity: detailed
|
||||
otlp/jaeger:
|
||||
endpoint: jaeger:4317
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [debug, otlp/jaeger]
|
||||
278
docs/build/telemetry.md
vendored
Normal file
278
docs/build/telemetry.md
vendored
Normal file
@@ -0,0 +1,278 @@
|
||||
# OpenTelemetry Tracing for Rippled
|
||||
|
||||
This document explains how to build rippled with OpenTelemetry distributed tracing support, configure the runtime telemetry options, and set up the observability backend to view traces.
|
||||
|
||||
- [OpenTelemetry Tracing for Rippled](#opentelemetry-tracing-for-rippled)
|
||||
- [Overview](#overview)
|
||||
- [Building with Telemetry](#building-with-telemetry)
|
||||
- [Summary](#summary)
|
||||
- [Build steps](#build-steps)
|
||||
- [Install dependencies](#install-dependencies)
|
||||
- [Call CMake](#call-cmake)
|
||||
- [Build](#build)
|
||||
- [Building without telemetry](#building-without-telemetry)
|
||||
- [Runtime Configuration](#runtime-configuration)
|
||||
- [Configuration options](#configuration-options)
|
||||
- [Observability Stack](#observability-stack)
|
||||
- [Start the stack](#start-the-stack)
|
||||
- [Verify the stack](#verify-the-stack)
|
||||
- [View traces in Jaeger](#view-traces-in-jaeger)
|
||||
- [Running Tests](#running-tests)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
- [No traces appear in Jaeger](#no-traces-appear-in-jaeger)
|
||||
- [Conan lockfile error](#conan-lockfile-error)
|
||||
- [CMake target not found](#cmake-target-not-found)
|
||||
- [Architecture](#architecture)
|
||||
- [Key files](#key-files)
|
||||
- [Conditional compilation](#conditional-compilation)
|
||||
|
||||
## Overview
|
||||
|
||||
Rippled supports optional [OpenTelemetry](https://opentelemetry.io/) distributed tracing.
|
||||
When enabled, it instruments RPC requests with trace spans that are exported via
|
||||
OTLP/HTTP to an OpenTelemetry Collector, which forwards them to a tracing backend
|
||||
such as Jaeger.
|
||||
|
||||
Telemetry is **off by default** at both compile time and runtime:
|
||||
|
||||
- **Compile time**: The Conan option `telemetry` and CMake option `telemetry` must be set to `True`/`ON`.
|
||||
When disabled, all tracing macros compile to `((void)0)` with zero overhead.
|
||||
- **Runtime**: The `[telemetry]` config section must set `enabled=1`.
|
||||
When disabled at runtime, a no-op implementation is used.
|
||||
|
||||
## Building with Telemetry
|
||||
|
||||
### Summary
|
||||
|
||||
Follow the same instructions as mentioned in [BUILD.md](../../BUILD.md) but with the following changes:
|
||||
|
||||
1. Pass `-o telemetry=True` to `conan install` to pull the `opentelemetry-cpp` dependency.
|
||||
2. CMake will automatically pick up `telemetry=ON` from the Conan-generated toolchain.
|
||||
3. Build as usual.
|
||||
|
||||
---
|
||||
|
||||
### Build steps
|
||||
|
||||
```bash
|
||||
cd /path/to/rippled
|
||||
rm -rf .build
|
||||
mkdir .build
|
||||
cd .build
|
||||
```
|
||||
|
||||
#### Install dependencies
|
||||
|
||||
The `telemetry` option adds `opentelemetry-cpp/1.18.0` as a dependency.
|
||||
If the Conan lockfile does not yet include this package, bypass it with `--lockfile=""`.
|
||||
|
||||
```bash
|
||||
conan install .. \
|
||||
--output-folder . \
|
||||
--build missing \
|
||||
--settings build_type=Debug \
|
||||
-o telemetry=True \
|
||||
-o tests=True \
|
||||
-o xrpld=True \
|
||||
--lockfile=""
|
||||
```
|
||||
|
||||
> **Note**: The first build with telemetry may take longer as `opentelemetry-cpp`
|
||||
> and its transitive dependencies are compiled from source.
|
||||
|
||||
#### Call CMake
|
||||
|
||||
The Conan-generated toolchain file sets `telemetry=ON` automatically.
|
||||
No additional CMake flags are needed beyond the standard ones.
|
||||
|
||||
```bash
|
||||
cmake .. -G Ninja \
|
||||
-DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-Dtests=ON -Dxrpld=ON
|
||||
```
|
||||
|
||||
You should see in the CMake output:
|
||||
|
||||
```
|
||||
-- OpenTelemetry tracing enabled
|
||||
```
|
||||
|
||||
#### Build
|
||||
|
||||
```bash
|
||||
cmake --build . --parallel $(nproc)
|
||||
```
|
||||
|
||||
### Building without telemetry
|
||||
|
||||
Omit the `-o telemetry=True` option (or pass `-o telemetry=False`).
|
||||
The `opentelemetry-cpp` dependency will not be downloaded,
|
||||
the `XRPL_ENABLE_TELEMETRY` preprocessor define will not be set,
|
||||
and all tracing macros will compile to no-ops.
|
||||
The resulting binary is identical to one built before telemetry support was added.
|
||||
|
||||
## Runtime Configuration
|
||||
|
||||
Add a `[telemetry]` section to your `xrpld.cfg` file:
|
||||
|
||||
```ini
|
||||
[telemetry]
|
||||
enabled=1
|
||||
service_name=rippled
|
||||
endpoint=http://localhost:4318/v1/traces
|
||||
sampling_ratio=1.0
|
||||
trace_rpc=1
|
||||
trace_transactions=1
|
||||
trace_consensus=1
|
||||
trace_peer=0
|
||||
```
|
||||
|
||||
### Configuration options
|
||||
|
||||
| Option | Type | Default | Description |
|
||||
| --------------------- | ------ | --------------------------------- | -------------------------------------------------- |
|
||||
| `enabled` | int | `0` | Enable (`1`) or disable (`0`) telemetry at runtime |
|
||||
| `service_name` | string | `rippled` | Service name reported in traces |
|
||||
| `service_instance_id` | string | node public key | Unique instance identifier |
|
||||
| `exporter` | string | `otlp_http` | Exporter type |
|
||||
| `endpoint` | string | `http://localhost:4318/v1/traces` | OTLP/HTTP collector endpoint |
|
||||
| `use_tls` | int | `0` | Enable TLS for the exporter connection |
|
||||
| `tls_ca_cert` | string | (empty) | Path to CA certificate for TLS |
|
||||
| `sampling_ratio` | double | `1.0` | Fraction of traces to sample (`0.0` to `1.0`) |
|
||||
| `batch_size` | uint32 | `512` | Maximum spans per export batch |
|
||||
| `batch_delay_ms` | uint32 | `5000` | Maximum delay (ms) before flushing a batch |
|
||||
| `max_queue_size` | uint32 | `2048` | Maximum spans queued in memory |
|
||||
| `trace_rpc` | int | `1` | Enable RPC request tracing |
|
||||
| `trace_transactions` | int | `1` | Enable transaction lifecycle tracing |
|
||||
| `trace_consensus` | int | `1` | Enable consensus round tracing |
|
||||
| `trace_peer` | int | `0` | Enable peer message tracing (high volume) |
|
||||
| `trace_ledger` | int | `1` | Enable ledger close tracing |
|
||||
|
||||
## Observability Stack
|
||||
|
||||
A Docker Compose stack is provided in `docker/telemetry/` with three services:
|
||||
|
||||
| Service | Port | Purpose |
|
||||
| ------------------ | ---------------------------------------------- | ---------------------------------------------------- |
|
||||
| **OTel Collector** | `4317` (gRPC), `4318` (HTTP), `13133` (health) | Receives OTLP spans, batches, and forwards to Jaeger |
|
||||
| **Jaeger** | `16686` (UI) | Trace storage and visualization |
|
||||
| **Grafana** | `3000` | Dashboards (Jaeger pre-configured as datasource) |
|
||||
|
||||
### Start the stack
|
||||
|
||||
```bash
|
||||
docker compose -f docker/telemetry/docker-compose.yml up -d
|
||||
```
|
||||
|
||||
### Verify the stack
|
||||
|
||||
```bash
|
||||
# Collector health
|
||||
curl http://localhost:13133
|
||||
|
||||
# Jaeger UI
|
||||
open http://localhost:16686
|
||||
|
||||
# Grafana
|
||||
open http://localhost:3000
|
||||
```
|
||||
|
||||
### View traces in Jaeger
|
||||
|
||||
1. Open `http://localhost:16686` in a browser.
|
||||
2. Select the service name (e.g. `rippled`) from the **Service** dropdown.
|
||||
3. Click **Find Traces**.
|
||||
4. Click into any trace to see the span tree and attributes.
|
||||
|
||||
Traced RPC operations produce a span hierarchy like:
|
||||
|
||||
```
|
||||
rpc.request
|
||||
└── rpc.command.server_info (xrpl.rpc.command=server_info, xrpl.rpc.status=success)
|
||||
```
|
||||
|
||||
Each span includes attributes:
|
||||
|
||||
- `xrpl.rpc.command` — the RPC method name
|
||||
- `xrpl.rpc.version` — API version
|
||||
- `xrpl.rpc.role` — `admin` or `user`
|
||||
- `xrpl.rpc.status` — `success` or `error`
|
||||
|
||||
## Running Tests
|
||||
|
||||
Unit tests run with the telemetry-enabled build regardless of whether the
|
||||
observability stack is running. When no collector is available, the exporter
|
||||
silently drops spans with no impact on test results.
|
||||
|
||||
```bash
|
||||
# Run all RPC tests
|
||||
./xrpld --unittest=RPCCall,ServerInfo,AccountTx,LedgerRPC,Transaction --unittest-jobs $(nproc)
|
||||
|
||||
# Run the full test suite
|
||||
./xrpld --unittest --unittest-jobs $(nproc)
|
||||
```
|
||||
|
||||
To generate traces during manual testing, start rippled in standalone mode:
|
||||
|
||||
```bash
|
||||
./xrpld --conf /path/to/xrpld.cfg --standalone --start
|
||||
```
|
||||
|
||||
Then send RPC requests:
|
||||
|
||||
```bash
|
||||
curl -s -X POST http://127.0.0.1:5005/ \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"method":"server_info","params":[{}]}'
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### No traces appear in Jaeger
|
||||
|
||||
1. Confirm the OTel Collector is running: `docker compose -f docker/telemetry/docker-compose.yml ps`
|
||||
2. Check collector logs for errors: `docker compose -f docker/telemetry/docker-compose.yml logs otel-collector`
|
||||
3. Confirm `[telemetry] enabled=1` is set in the rippled config.
|
||||
4. Confirm `endpoint` points to the correct collector address (`http://localhost:4318/v1/traces`).
|
||||
5. Wait for the batch delay to elapse (default `5000` ms) before checking Jaeger.
|
||||
|
||||
### Conan lockfile error
|
||||
|
||||
If you see `ERROR: Requirement 'opentelemetry-cpp/1.18.0' not in lockfile 'requires'`,
|
||||
the lockfile was generated without the telemetry dependency.
|
||||
Pass `--lockfile=""` to bypass the lockfile, or regenerate it with telemetry enabled.
|
||||
|
||||
### CMake target not found
|
||||
|
||||
If CMake reports that `opentelemetry-cpp` targets are not found,
|
||||
ensure you ran `conan install` with `-o telemetry=True` and that the
|
||||
Conan-generated toolchain file is being used.
|
||||
The Conan package provides a single umbrella target
|
||||
`opentelemetry-cpp::opentelemetry-cpp` (not individual component targets).
|
||||
|
||||
## Architecture
|
||||
|
||||
### Key files
|
||||
|
||||
| File | Purpose |
|
||||
| ---------------------------------------------- | ----------------------------------------------------------- |
|
||||
| `include/xrpl/telemetry/Telemetry.h` | Abstract telemetry interface and `Setup` struct |
|
||||
| `include/xrpl/telemetry/SpanGuard.h` | RAII span guard (activates scope, ends span on destruction) |
|
||||
| `src/libxrpl/telemetry/Telemetry.cpp` | OTel-backed implementation (`TelemetryImpl`) |
|
||||
| `src/libxrpl/telemetry/TelemetryConfig.cpp` | Config parser (`setup_Telemetry()`) |
|
||||
| `src/libxrpl/telemetry/NullTelemetry.cpp` | No-op implementation (used when disabled) |
|
||||
| `src/xrpld/telemetry/TracingInstrumentation.h` | Convenience macros (`XRPL_TRACE_RPC`, etc.) |
|
||||
| `src/xrpld/rpc/detail/ServerHandler.cpp` | RPC entry point instrumentation |
|
||||
| `src/xrpld/rpc/detail/RPCHandler.cpp` | Per-command instrumentation |
|
||||
| `docker/telemetry/docker-compose.yml` | Observability stack (Collector + Jaeger + Grafana) |
|
||||
| `docker/telemetry/otel-collector-config.yaml` | OTel Collector pipeline configuration |
|
||||
|
||||
### Conditional compilation
|
||||
|
||||
All OpenTelemetry SDK headers are guarded behind `#ifdef XRPL_ENABLE_TELEMETRY`.
|
||||
The instrumentation macros in `TracingInstrumentation.h` compile to `((void)0)` when
|
||||
the define is absent.
|
||||
At runtime, if `enabled=0` is set in config (or the section is omitted), a
|
||||
`NullTelemetry` implementation is used that returns no-op spans.
|
||||
This two-layer approach ensures zero overhead when telemetry is not wanted.
|
||||
@@ -19,6 +19,9 @@ class Manager;
|
||||
namespace perf {
|
||||
class PerfLog;
|
||||
}
|
||||
namespace telemetry {
|
||||
class Telemetry;
|
||||
}
|
||||
|
||||
// This is temporary until we migrate all code to use ServiceRegistry.
|
||||
class Application;
|
||||
@@ -205,6 +208,9 @@ public:
|
||||
virtual perf::PerfLog&
|
||||
getPerfLog() = 0;
|
||||
|
||||
virtual telemetry::Telemetry&
|
||||
getTelemetry() = 0;
|
||||
|
||||
// Configuration and state
|
||||
virtual bool
|
||||
isStopping() const = 0;
|
||||
|
||||
155
include/xrpl/telemetry/SpanGuard.h
Normal file
155
include/xrpl/telemetry/SpanGuard.h
Normal file
@@ -0,0 +1,155 @@
|
||||
#pragma once
|
||||
|
||||
/** RAII guard for OpenTelemetry trace spans.
|
||||
|
||||
Wraps an OTel Span and Scope together. On construction, the span is
|
||||
activated on the current thread's context (via Scope). On destruction,
|
||||
the span is ended and the previous context is restored.
|
||||
|
||||
Used by the XRPL_TRACE_* macros in TracingInstrumentation.h. Can also
|
||||
be stored in std::optional for conditional tracing (move-constructible).
|
||||
|
||||
Only compiled when XRPL_ENABLE_TELEMETRY is defined.
|
||||
*/
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
|
||||
#include <opentelemetry/context/runtime_context.h>
|
||||
#include <opentelemetry/nostd/shared_ptr.h>
|
||||
#include <opentelemetry/trace/scope.h>
|
||||
#include <opentelemetry/trace/span.h>
|
||||
|
||||
#include <exception>
|
||||
#include <string_view>
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
/** RAII wrapper that activates a span on construction and ends it on
|
||||
destruction. Non-copyable but move-constructible so it can be held
|
||||
in std::optional for conditional tracing.
|
||||
*/
|
||||
class SpanGuard
|
||||
{
|
||||
/** The OTel span being guarded. Set to nullptr after move. */
|
||||
opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span> span_;
|
||||
|
||||
/** Scope that activates span_ on the current thread's context stack. */
|
||||
opentelemetry::trace::Scope scope_;
|
||||
|
||||
public:
|
||||
/** Construct a guard that activates @p span on the current context.
|
||||
|
||||
@param span The span to guard. Ended in the destructor.
|
||||
*/
|
||||
explicit SpanGuard(opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span> span)
|
||||
: span_(std::move(span)), scope_(span_)
|
||||
{
|
||||
}
|
||||
|
||||
/** Non-copyable. Move-constructible to support std::optional.
|
||||
|
||||
The move constructor creates a new Scope from the transferred span,
|
||||
because Scope is not movable.
|
||||
*/
|
||||
SpanGuard(SpanGuard const&) = delete;
|
||||
SpanGuard&
|
||||
operator=(SpanGuard const&) = delete;
|
||||
SpanGuard(SpanGuard&& other) noexcept : span_(std::move(other.span_)), scope_(span_)
|
||||
{
|
||||
other.span_ = nullptr;
|
||||
}
|
||||
SpanGuard&
|
||||
operator=(SpanGuard&&) = delete;
|
||||
|
||||
~SpanGuard()
|
||||
{
|
||||
if (span_)
|
||||
span_->End();
|
||||
}
|
||||
|
||||
/** @return A mutable reference to the underlying span. */
|
||||
opentelemetry::trace::Span&
|
||||
span()
|
||||
{
|
||||
return *span_;
|
||||
}
|
||||
|
||||
/** @return A const reference to the underlying span. */
|
||||
opentelemetry::trace::Span const&
|
||||
span() const
|
||||
{
|
||||
return *span_;
|
||||
}
|
||||
|
||||
/** Mark the span status as OK. */
|
||||
void
|
||||
setOk()
|
||||
{
|
||||
span_->SetStatus(opentelemetry::trace::StatusCode::kOk);
|
||||
}
|
||||
|
||||
/** Set an explicit status code on the span.
|
||||
|
||||
@param code The OTel status code.
|
||||
@param description Optional human-readable status description.
|
||||
*/
|
||||
void
|
||||
setStatus(opentelemetry::trace::StatusCode code, std::string_view description = "")
|
||||
{
|
||||
span_->SetStatus(code, std::string(description));
|
||||
}
|
||||
|
||||
/** Set a key-value attribute on the span.
|
||||
|
||||
@param key Attribute name (e.g. "xrpl.rpc.command").
|
||||
@param value Attribute value (string, int, bool, etc.).
|
||||
*/
|
||||
template <typename T>
|
||||
void
|
||||
setAttribute(std::string_view key, T&& value)
|
||||
{
|
||||
span_->SetAttribute(
|
||||
opentelemetry::nostd::string_view(key.data(), key.size()), std::forward<T>(value));
|
||||
}
|
||||
|
||||
/** Add a named event to the span's timeline.
|
||||
|
||||
@param name Event name.
|
||||
*/
|
||||
void
|
||||
addEvent(std::string_view name)
|
||||
{
|
||||
span_->AddEvent(std::string(name));
|
||||
}
|
||||
|
||||
/** Record an exception as a span event following OTel semantic
|
||||
conventions, and mark the span status as error.
|
||||
|
||||
@param e The exception to record.
|
||||
*/
|
||||
void
|
||||
recordException(std::exception const& e)
|
||||
{
|
||||
span_->AddEvent(
|
||||
"exception",
|
||||
{{"exception.type", "std::exception"}, {"exception.message", std::string(e.what())}});
|
||||
span_->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
|
||||
}
|
||||
|
||||
/** Return the current OTel context.
|
||||
|
||||
Useful for creating child spans on a different thread by passing
|
||||
this context to Telemetry::startSpan(name, parentContext).
|
||||
*/
|
||||
opentelemetry::context::Context
|
||||
context() const
|
||||
{
|
||||
return opentelemetry::context::RuntimeContext::GetCurrent();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
|
||||
#endif // XRPL_ENABLE_TELEMETRY
|
||||
209
include/xrpl/telemetry/Telemetry.h
Normal file
209
include/xrpl/telemetry/Telemetry.h
Normal file
@@ -0,0 +1,209 @@
|
||||
#pragma once
|
||||
|
||||
/** Abstract interface for OpenTelemetry distributed tracing.
|
||||
|
||||
Provides the Telemetry base class that all components use to create trace
|
||||
spans. Two implementations exist:
|
||||
|
||||
- TelemetryImpl (Telemetry.cpp): real OTel SDK integration, compiled
|
||||
only when XRPL_ENABLE_TELEMETRY is defined and enabled at runtime.
|
||||
- NullTelemetry (NullTelemetry.cpp): no-op stub used when telemetry is
|
||||
disabled at compile time or runtime.
|
||||
|
||||
The Setup struct holds all configuration parsed from the [telemetry]
|
||||
section of xrpld.cfg. See TelemetryConfig.cpp for the parser and
|
||||
cfg/xrpld-example.cfg for the available options.
|
||||
|
||||
OTel SDK headers are conditionally included behind XRPL_ENABLE_TELEMETRY
|
||||
so that builds without telemetry have zero dependency on opentelemetry-cpp.
|
||||
*/
|
||||
|
||||
#include <xrpl/basics/BasicConfig.h>
|
||||
#include <xrpl/beast/utility/Journal.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
#include <opentelemetry/context/context.h>
|
||||
#include <opentelemetry/nostd/shared_ptr.h>
|
||||
#include <opentelemetry/trace/span.h>
|
||||
#include <opentelemetry/trace/tracer.h>
|
||||
#endif
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
class Telemetry
|
||||
{
|
||||
public:
|
||||
/** Configuration parsed from the [telemetry] section of xrpld.cfg.
|
||||
|
||||
All fields have sensible defaults so the section can be minimal
|
||||
or omitted entirely. See TelemetryConfig.cpp for the parser.
|
||||
*/
|
||||
struct Setup
|
||||
{
|
||||
/** Master switch: true to enable tracing at runtime. */
|
||||
bool enabled = false;
|
||||
|
||||
/** OTel resource attribute `service.name`. */
|
||||
std::string serviceName = "rippled";
|
||||
|
||||
/** OTel resource attribute `service.version` (set from BuildInfo). */
|
||||
std::string serviceVersion;
|
||||
|
||||
/** OTel resource attribute `service.instance.id` (defaults to node
|
||||
public key). */
|
||||
std::string serviceInstanceId;
|
||||
|
||||
/** Exporter type: currently only "otlp_http" is supported. */
|
||||
std::string exporterType = "otlp_http";
|
||||
|
||||
/** OTLP/HTTP endpoint URL where spans are sent. */
|
||||
std::string exporterEndpoint = "http://localhost:4318/v1/traces";
|
||||
|
||||
/** Whether to use TLS for the exporter connection. */
|
||||
bool useTls = false;
|
||||
|
||||
/** Path to a CA certificate bundle for TLS verification. */
|
||||
std::string tlsCertPath;
|
||||
|
||||
/** Head-based sampling ratio in [0.0, 1.0]. 1.0 = trace everything. */
|
||||
double samplingRatio = 1.0;
|
||||
|
||||
/** Maximum number of spans per batch export. */
|
||||
std::uint32_t batchSize = 512;
|
||||
|
||||
/** Delay between batch exports. */
|
||||
std::chrono::milliseconds batchDelay{5000};
|
||||
|
||||
/** Maximum number of spans queued before dropping. */
|
||||
std::uint32_t maxQueueSize = 2048;
|
||||
|
||||
/** Network identifier, added as an OTel resource attribute. */
|
||||
std::uint32_t networkId = 0;
|
||||
|
||||
/** Network type label (e.g. "mainnet", "testnet", "devnet"). */
|
||||
std::string networkType = "mainnet";
|
||||
|
||||
/** Enable tracing for transaction processing. */
|
||||
bool traceTransactions = true;
|
||||
|
||||
/** Enable tracing for consensus rounds. */
|
||||
bool traceConsensus = true;
|
||||
|
||||
/** Enable tracing for RPC request handling. */
|
||||
bool traceRpc = true;
|
||||
|
||||
/** Enable tracing for peer-to-peer messages (disabled by default
|
||||
due to high volume). */
|
||||
bool tracePeer = false;
|
||||
|
||||
/** Enable tracing for ledger close/accept. */
|
||||
bool traceLedger = true;
|
||||
};
|
||||
|
||||
virtual ~Telemetry() = default;
|
||||
|
||||
/** Initialize the tracing pipeline (exporter, processor, provider).
|
||||
Call after construction.
|
||||
*/
|
||||
virtual void
|
||||
start() = 0;
|
||||
|
||||
/** Flush pending spans and shut down the tracing pipeline.
|
||||
Call before destruction.
|
||||
*/
|
||||
virtual void
|
||||
stop() = 0;
|
||||
|
||||
/** @return true if this instance is actively exporting spans. */
|
||||
virtual bool
|
||||
isEnabled() const = 0;
|
||||
|
||||
/** @return true if transaction processing should be traced. */
|
||||
virtual bool
|
||||
shouldTraceTransactions() const = 0;
|
||||
|
||||
/** @return true if consensus rounds should be traced. */
|
||||
virtual bool
|
||||
shouldTraceConsensus() const = 0;
|
||||
|
||||
/** @return true if RPC request handling should be traced. */
|
||||
virtual bool
|
||||
shouldTraceRpc() const = 0;
|
||||
|
||||
/** @return true if peer-to-peer messages should be traced. */
|
||||
virtual bool
|
||||
shouldTracePeer() const = 0;
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
/** Get or create a named tracer instance.
|
||||
|
||||
@param name Tracer name used to identify the instrumentation library.
|
||||
@return A shared pointer to the Tracer.
|
||||
*/
|
||||
virtual opentelemetry::nostd::shared_ptr<opentelemetry::trace::Tracer>
|
||||
getTracer(std::string_view name = "rippled") = 0;
|
||||
|
||||
/** Start a new span on the current thread's context.
|
||||
|
||||
The span becomes a child of the current active span (if any) via
|
||||
OpenTelemetry's context propagation.
|
||||
|
||||
@param name Span name (typically "rpc.command.<cmd>").
|
||||
@param kind The span kind (defaults to kInternal).
|
||||
@return A shared pointer to the new Span.
|
||||
*/
|
||||
virtual opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>
|
||||
startSpan(
|
||||
std::string_view name,
|
||||
opentelemetry::trace::SpanKind kind = opentelemetry::trace::SpanKind::kInternal) = 0;
|
||||
|
||||
/** Start a new span with an explicit parent context.
|
||||
|
||||
Use this overload when the parent span is not on the current
|
||||
thread's context stack (e.g. cross-thread trace propagation).
|
||||
|
||||
@param name Span name.
|
||||
@param parentContext The parent span's context.
|
||||
@param kind The span kind (defaults to kInternal).
|
||||
@return A shared pointer to the new Span.
|
||||
*/
|
||||
virtual opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>
|
||||
startSpan(
|
||||
std::string_view name,
|
||||
opentelemetry::context::Context const& parentContext,
|
||||
opentelemetry::trace::SpanKind kind = opentelemetry::trace::SpanKind::kInternal) = 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
/** Create a Telemetry instance.
|
||||
|
||||
Returns a TelemetryImpl when setup.enabled is true, or a
|
||||
NullTelemetry no-op stub otherwise.
|
||||
|
||||
@param setup Configuration from the [telemetry] config section.
|
||||
@param journal Journal for log output during initialization.
|
||||
*/
|
||||
std::unique_ptr<Telemetry>
|
||||
make_Telemetry(Telemetry::Setup const& setup, beast::Journal journal);
|
||||
|
||||
/** Parse the [telemetry] config section into a Setup struct.
|
||||
|
||||
@param section The [telemetry] config section.
|
||||
@param nodePublicKey Node public key, used as default instance ID.
|
||||
@param version Build version string.
|
||||
@return A populated Setup struct with defaults for missing values.
|
||||
*/
|
||||
Telemetry::Setup
|
||||
setup_Telemetry(
|
||||
Section const& section,
|
||||
std::string const& nodePublicKey,
|
||||
std::string const& version);
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
121
src/libxrpl/telemetry/NullTelemetry.cpp
Normal file
121
src/libxrpl/telemetry/NullTelemetry.cpp
Normal file
@@ -0,0 +1,121 @@
|
||||
/** No-op implementation of the Telemetry interface.
|
||||
|
||||
Always compiled (regardless of XRPL_ENABLE_TELEMETRY). Provides the
|
||||
make_Telemetry() factory when telemetry is compiled out (#ifndef), which
|
||||
unconditionally returns a NullTelemetry that does nothing.
|
||||
|
||||
When XRPL_ENABLE_TELEMETRY IS defined, the OTel virtual methods
|
||||
(getTracer, startSpan) return noop tracers/spans. The make_Telemetry()
|
||||
factory in this file is not used in that case -- Telemetry.cpp provides
|
||||
its own factory that can return the real TelemetryImpl.
|
||||
*/
|
||||
|
||||
#include <xrpl/telemetry/Telemetry.h>
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
#include <opentelemetry/trace/noop.h>
|
||||
#endif
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
namespace {
|
||||
|
||||
/** No-op Telemetry that returns immediately from every method.
|
||||
|
||||
Used as the sole implementation when XRPL_ENABLE_TELEMETRY is not
|
||||
defined, or as a fallback when it is defined but enabled=0.
|
||||
*/
|
||||
class NullTelemetry : public Telemetry
|
||||
{
|
||||
/** Retained configuration (unused, kept for diagnostic access). */
|
||||
Setup const setup_;
|
||||
|
||||
public:
|
||||
explicit NullTelemetry(Setup const& setup) : setup_(setup)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
start() override
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
stop() override
|
||||
{
|
||||
}
|
||||
|
||||
bool
|
||||
isEnabled() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceTransactions() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceConsensus() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceRpc() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTracePeer() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
opentelemetry::nostd::shared_ptr<opentelemetry::trace::Tracer>
|
||||
getTracer(std::string_view) override
|
||||
{
|
||||
static auto noopTracer = opentelemetry::nostd::shared_ptr<opentelemetry::trace::Tracer>(
|
||||
new opentelemetry::trace::NoopTracer());
|
||||
return noopTracer;
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>
|
||||
startSpan(std::string_view, opentelemetry::trace::SpanKind) override
|
||||
{
|
||||
return opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>(
|
||||
new opentelemetry::trace::NoopSpan(nullptr));
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>
|
||||
startSpan(
|
||||
std::string_view,
|
||||
opentelemetry::context::Context const&,
|
||||
opentelemetry::trace::SpanKind) override
|
||||
{
|
||||
return opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>(
|
||||
new opentelemetry::trace::NoopSpan(nullptr));
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/** Factory used when XRPL_ENABLE_TELEMETRY is not defined.
|
||||
Unconditionally returns a NullTelemetry instance.
|
||||
*/
|
||||
#ifndef XRPL_ENABLE_TELEMETRY
|
||||
std::unique_ptr<Telemetry>
|
||||
make_Telemetry(Telemetry::Setup const& setup, beast::Journal)
|
||||
{
|
||||
return std::make_unique<NullTelemetry>(setup);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
279
src/libxrpl/telemetry/Telemetry.cpp
Normal file
279
src/libxrpl/telemetry/Telemetry.cpp
Normal file
@@ -0,0 +1,279 @@
|
||||
/** OpenTelemetry SDK implementation of the Telemetry interface.
|
||||
|
||||
Compiled only when XRPL_ENABLE_TELEMETRY is defined (via CMake
|
||||
telemetry=ON). Contains:
|
||||
|
||||
- TelemetryImpl: configures the OTel SDK with an OTLP/HTTP exporter,
|
||||
batch span processor, trace-ID-ratio sampler, and resource attributes.
|
||||
- NullTelemetryOtel: no-op fallback used when telemetry is compiled in
|
||||
but disabled at runtime (enabled=0 in config).
|
||||
- make_Telemetry(): factory that selects the appropriate implementation.
|
||||
*/
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
|
||||
#include <xrpl/basics/Log.h>
|
||||
#include <xrpl/telemetry/Telemetry.h>
|
||||
|
||||
#include <opentelemetry/exporters/otlp/otlp_http_exporter_factory.h>
|
||||
#include <opentelemetry/exporters/otlp/otlp_http_exporter_options.h>
|
||||
#include <opentelemetry/sdk/resource/semantic_conventions.h>
|
||||
#include <opentelemetry/sdk/trace/batch_span_processor_factory.h>
|
||||
#include <opentelemetry/sdk/trace/batch_span_processor_options.h>
|
||||
#include <opentelemetry/sdk/trace/sampler.h>
|
||||
#include <opentelemetry/sdk/trace/samplers/trace_id_ratio.h>
|
||||
#include <opentelemetry/sdk/trace/tracer_provider.h>
|
||||
#include <opentelemetry/sdk/trace/tracer_provider_factory.h>
|
||||
#include <opentelemetry/trace/noop.h>
|
||||
#include <opentelemetry/trace/provider.h>
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
namespace {
|
||||
|
||||
namespace trace_api = opentelemetry::trace;
|
||||
namespace trace_sdk = opentelemetry::sdk::trace;
|
||||
namespace otlp_http = opentelemetry::exporter::otlp;
|
||||
namespace resource = opentelemetry::sdk::resource;
|
||||
|
||||
/** No-op implementation used when XRPL_ENABLE_TELEMETRY is defined but
|
||||
setup.enabled is false at runtime.
|
||||
|
||||
Lives in the anonymous namespace so there is no ODR conflict with the
|
||||
NullTelemetry in NullTelemetry.cpp.
|
||||
*/
|
||||
class NullTelemetryOtel : public Telemetry
|
||||
{
|
||||
/** Retained configuration (unused, kept for diagnostic access). */
|
||||
Setup const setup_;
|
||||
|
||||
public:
|
||||
explicit NullTelemetryOtel(Setup const& setup) : setup_(setup)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
start() override
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
stop() override
|
||||
{
|
||||
}
|
||||
|
||||
bool
|
||||
isEnabled() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceTransactions() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceConsensus() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceRpc() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTracePeer() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Tracer>
|
||||
getTracer(std::string_view) override
|
||||
{
|
||||
static auto noopTracer =
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Tracer>(new trace_api::NoopTracer());
|
||||
return noopTracer;
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Span>
|
||||
startSpan(std::string_view, trace_api::SpanKind) override
|
||||
{
|
||||
return opentelemetry::nostd::shared_ptr<trace_api::Span>(new trace_api::NoopSpan(nullptr));
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Span>
|
||||
startSpan(std::string_view, opentelemetry::context::Context const&, trace_api::SpanKind)
|
||||
override
|
||||
{
|
||||
return opentelemetry::nostd::shared_ptr<trace_api::Span>(new trace_api::NoopSpan(nullptr));
|
||||
}
|
||||
};
|
||||
|
||||
/** Full OTel SDK implementation that exports trace spans via OTLP/HTTP.
|
||||
|
||||
Configures an OTLP/HTTP exporter, batch span processor,
|
||||
TraceIdRatioBasedSampler, and resource attributes on start().
|
||||
*/
|
||||
class TelemetryImpl : public Telemetry
|
||||
{
|
||||
/** Configuration from the [telemetry] config section. */
|
||||
Setup const setup_;
|
||||
|
||||
/** Journal used for log output during start/stop. */
|
||||
beast::Journal const journal_;
|
||||
|
||||
/** The SDK TracerProvider that owns the export pipeline.
|
||||
|
||||
Held as std::shared_ptr so we can call ForceFlush() on shutdown.
|
||||
Wrapped in a nostd::shared_ptr when registered as the global provider.
|
||||
*/
|
||||
std::shared_ptr<trace_sdk::TracerProvider> sdkProvider_;
|
||||
|
||||
public:
|
||||
TelemetryImpl(Setup const& setup, beast::Journal journal) : setup_(setup), journal_(journal)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
start() override
|
||||
{
|
||||
JLOG(journal_.info()) << "Telemetry starting: endpoint=" << setup_.exporterEndpoint
|
||||
<< " sampling=" << setup_.samplingRatio;
|
||||
|
||||
// Configure OTLP HTTP exporter
|
||||
otlp_http::OtlpHttpExporterOptions exporterOpts;
|
||||
exporterOpts.url = setup_.exporterEndpoint;
|
||||
if (setup_.useTls)
|
||||
exporterOpts.ssl_ca_cert_path = setup_.tlsCertPath;
|
||||
|
||||
auto exporter = otlp_http::OtlpHttpExporterFactory::Create(exporterOpts);
|
||||
|
||||
// Configure batch processor
|
||||
trace_sdk::BatchSpanProcessorOptions processorOpts;
|
||||
processorOpts.max_queue_size = setup_.maxQueueSize;
|
||||
processorOpts.schedule_delay_millis = std::chrono::milliseconds(setup_.batchDelay);
|
||||
processorOpts.max_export_batch_size = setup_.batchSize;
|
||||
|
||||
auto processor =
|
||||
trace_sdk::BatchSpanProcessorFactory::Create(std::move(exporter), processorOpts);
|
||||
|
||||
// Configure resource attributes
|
||||
auto resourceAttrs = resource::Resource::Create({
|
||||
{resource::SemanticConventions::kServiceName, setup_.serviceName},
|
||||
{resource::SemanticConventions::kServiceVersion, setup_.serviceVersion},
|
||||
{resource::SemanticConventions::kServiceInstanceId, setup_.serviceInstanceId},
|
||||
{"xrpl.network.id", static_cast<int64_t>(setup_.networkId)},
|
||||
{"xrpl.network.type", setup_.networkType},
|
||||
});
|
||||
|
||||
// Configure sampler
|
||||
auto sampler = std::make_unique<trace_sdk::TraceIdRatioBasedSampler>(setup_.samplingRatio);
|
||||
|
||||
// Create TracerProvider
|
||||
sdkProvider_ = trace_sdk::TracerProviderFactory::Create(
|
||||
std::move(processor), resourceAttrs, std::move(sampler));
|
||||
|
||||
// Set as global provider
|
||||
trace_api::Provider::SetTracerProvider(
|
||||
opentelemetry::nostd::shared_ptr<trace_api::TracerProvider>(sdkProvider_));
|
||||
|
||||
JLOG(journal_.info()) << "Telemetry started successfully";
|
||||
}
|
||||
|
||||
void
|
||||
stop() override
|
||||
{
|
||||
JLOG(journal_.info()) << "Telemetry stopping";
|
||||
if (sdkProvider_)
|
||||
{
|
||||
// Force flush before shutdown
|
||||
sdkProvider_->ForceFlush();
|
||||
sdkProvider_.reset();
|
||||
trace_api::Provider::SetTracerProvider(
|
||||
opentelemetry::nostd::shared_ptr<trace_api::TracerProvider>(
|
||||
new trace_api::NoopTracerProvider()));
|
||||
}
|
||||
JLOG(journal_.info()) << "Telemetry stopped";
|
||||
}
|
||||
|
||||
bool
|
||||
isEnabled() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceTransactions() const override
|
||||
{
|
||||
return setup_.traceTransactions;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceConsensus() const override
|
||||
{
|
||||
return setup_.traceConsensus;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceRpc() const override
|
||||
{
|
||||
return setup_.traceRpc;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTracePeer() const override
|
||||
{
|
||||
return setup_.tracePeer;
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Tracer>
|
||||
getTracer(std::string_view name) override
|
||||
{
|
||||
if (!sdkProvider_)
|
||||
return trace_api::Provider::GetTracerProvider()->GetTracer(std::string(name));
|
||||
return sdkProvider_->GetTracer(std::string(name));
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Span>
|
||||
startSpan(std::string_view name, trace_api::SpanKind kind) override
|
||||
{
|
||||
auto tracer = getTracer("rippled");
|
||||
trace_api::StartSpanOptions opts;
|
||||
opts.kind = kind;
|
||||
return tracer->StartSpan(std::string(name), opts);
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Span>
|
||||
startSpan(
|
||||
std::string_view name,
|
||||
opentelemetry::context::Context const& parentContext,
|
||||
trace_api::SpanKind kind) override
|
||||
{
|
||||
auto tracer = getTracer("rippled");
|
||||
trace_api::StartSpanOptions opts;
|
||||
opts.kind = kind;
|
||||
opts.parent = parentContext;
|
||||
return tracer->StartSpan(std::string(name), opts);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<Telemetry>
|
||||
make_Telemetry(Telemetry::Setup const& setup, beast::Journal journal)
|
||||
{
|
||||
if (setup.enabled)
|
||||
return std::make_unique<TelemetryImpl>(setup, journal);
|
||||
return std::make_unique<NullTelemetryOtel>(setup);
|
||||
}
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
|
||||
#endif // XRPL_ENABLE_TELEMETRY
|
||||
52
src/libxrpl/telemetry/TelemetryConfig.cpp
Normal file
52
src/libxrpl/telemetry/TelemetryConfig.cpp
Normal file
@@ -0,0 +1,52 @@
|
||||
/** Parser for the [telemetry] section of xrpld.cfg.
|
||||
|
||||
Reads configuration values from the config file and populates a
|
||||
Telemetry::Setup struct. All options have sensible defaults so the
|
||||
section can be minimal or omitted entirely.
|
||||
|
||||
See cfg/xrpld-example.cfg for the full list of available options.
|
||||
*/
|
||||
|
||||
#include <xrpl/telemetry/Telemetry.h>
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
Telemetry::Setup
|
||||
setup_Telemetry(
|
||||
Section const& section,
|
||||
std::string const& nodePublicKey,
|
||||
std::string const& version)
|
||||
{
|
||||
Telemetry::Setup setup;
|
||||
|
||||
setup.enabled = section.value_or<int>("enabled", 0) != 0;
|
||||
setup.serviceName = section.value_or<std::string>("service_name", "rippled");
|
||||
setup.serviceVersion = version;
|
||||
setup.serviceInstanceId = section.value_or<std::string>("service_instance_id", nodePublicKey);
|
||||
|
||||
setup.exporterType = section.value_or<std::string>("exporter", "otlp_http");
|
||||
setup.exporterEndpoint =
|
||||
section.value_or<std::string>("endpoint", "http://localhost:4318/v1/traces");
|
||||
|
||||
setup.useTls = section.value_or<int>("use_tls", 0) != 0;
|
||||
setup.tlsCertPath = section.value_or<std::string>("tls_ca_cert", "");
|
||||
|
||||
setup.samplingRatio = section.value_or<double>("sampling_ratio", 1.0);
|
||||
|
||||
setup.batchSize = section.value_or<std::uint32_t>("batch_size", 512u);
|
||||
setup.batchDelay =
|
||||
std::chrono::milliseconds{section.value_or<std::uint32_t>("batch_delay_ms", 5000u)};
|
||||
setup.maxQueueSize = section.value_or<std::uint32_t>("max_queue_size", 2048u);
|
||||
|
||||
setup.traceTransactions = section.value_or<int>("trace_transactions", 1) != 0;
|
||||
setup.traceConsensus = section.value_or<int>("trace_consensus", 1) != 0;
|
||||
setup.traceRpc = section.value_or<int>("trace_rpc", 1) != 0;
|
||||
setup.tracePeer = section.value_or<int>("trace_peer", 0) != 0;
|
||||
setup.traceLedger = section.value_or<int>("trace_ledger", 1) != 0;
|
||||
|
||||
return setup;
|
||||
}
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
@@ -31,7 +31,6 @@
|
||||
#include <xrpld/shamap/NodeFamily.h>
|
||||
|
||||
#include <xrpl/basics/ByteUtilities.h>
|
||||
#include <xrpl/basics/MallocTrim.h>
|
||||
#include <xrpl/basics/ResolverAsio.h>
|
||||
#include <xrpl/basics/random.h>
|
||||
#include <xrpl/beast/asio/io_latency_probe.h>
|
||||
@@ -52,6 +51,7 @@
|
||||
#include <xrpl/resource/Fees.h>
|
||||
#include <xrpl/server/LoadFeeTrack.h>
|
||||
#include <xrpl/server/Wallet.h>
|
||||
#include <xrpl/telemetry/Telemetry.h>
|
||||
#include <xrpl/tx/apply.h>
|
||||
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
@@ -147,6 +147,7 @@ public:
|
||||
|
||||
beast::Journal m_journal;
|
||||
std::unique_ptr<perf::PerfLog> perfLog_;
|
||||
std::unique_ptr<telemetry::Telemetry> telemetry_;
|
||||
Application::MutexType m_masterMutex;
|
||||
|
||||
// Required by the SHAMapStore
|
||||
@@ -258,6 +259,14 @@ public:
|
||||
logs_->journal("PerfLog"),
|
||||
[this] { signalStop("PerfLog"); }))
|
||||
|
||||
, telemetry_(
|
||||
telemetry::make_Telemetry(
|
||||
telemetry::setup_Telemetry(
|
||||
config_->section("telemetry"),
|
||||
"", // nodePublicKey not yet available at this point
|
||||
BuildInfo::getVersionString()),
|
||||
logs_->journal("Telemetry")))
|
||||
|
||||
, m_txMaster(*this)
|
||||
|
||||
, m_collectorManager(
|
||||
@@ -618,6 +627,12 @@ public:
|
||||
return *perfLog_;
|
||||
}
|
||||
|
||||
telemetry::Telemetry&
|
||||
getTelemetry() override
|
||||
{
|
||||
return *telemetry_;
|
||||
}
|
||||
|
||||
NodeCache&
|
||||
getTempNodeCache() override
|
||||
{
|
||||
@@ -1054,8 +1069,6 @@ public:
|
||||
<< "; size after: " << cachedSLEs_.size();
|
||||
}
|
||||
|
||||
mallocTrim("doSweep", m_journal);
|
||||
|
||||
// Set timer to do another sweep later.
|
||||
setSweepTimer();
|
||||
}
|
||||
@@ -1472,6 +1485,7 @@ ApplicationImp::start(bool withTimers)
|
||||
|
||||
ledgerCleaner_->start();
|
||||
perfLog_->start();
|
||||
telemetry_->start();
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1562,6 +1576,7 @@ ApplicationImp::run()
|
||||
ledgerCleaner_->stop();
|
||||
m_nodeStore->stop();
|
||||
perfLog_->stop();
|
||||
telemetry_->stop();
|
||||
|
||||
JLOG(m_journal.info()) << "Done.";
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user