From 37331f39b5779b7fcd2ddaddb1c7ee062c1af8fb Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Mon, 23 Feb 2026 17:26:36 +0000 Subject: [PATCH] added comments and docs Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> --- CMakeLists.txt | 5 + cmake/XrplCore.cmake | 6 +- conanfile.py | 2 + docker/telemetry/docker-compose.yml | 16 + .../provisioning/datasources/jaeger.yaml | 5 + docker/telemetry/otel-collector-config.yaml | 6 + docs/build/telemetry.md | 277 ++++++++++++++++++ include/xrpl/telemetry/SpanGuard.h | 55 ++++ include/xrpl/telemetry/Telemetry.h | 112 +++++++ src/libxrpl/telemetry/NullTelemetry.cpp | 25 +- src/libxrpl/telemetry/Telemetry.cpp | 43 ++- src/libxrpl/telemetry/TelemetryConfig.cpp | 9 + src/xrpld/telemetry/TracingInstrumentation.h | 55 +++- 13 files changed, 598 insertions(+), 18 deletions(-) create mode 100644 docs/build/telemetry.md diff --git a/CMakeLists.txt b/CMakeLists.txt index 953896012b..8bb5eac71c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -126,6 +126,11 @@ if (rocksdb) target_link_libraries(xrpl_libs INTERFACE RocksDB::rocksdb) endif () +# OpenTelemetry distributed tracing (optional). +# When ON, links against opentelemetry-cpp and defines XRPL_ENABLE_TELEMETRY +# so that tracing macros in TracingInstrumentation.h are compiled in. +# When OFF (default), all tracing code compiles to no-ops with zero overhead. +# Enable via: conan install -o telemetry=True, or cmake -Dtelemetry=ON. option(telemetry "Enable OpenTelemetry tracing" OFF) if (telemetry) find_package(opentelemetry-cpp CONFIG REQUIRED) diff --git a/cmake/XrplCore.cmake b/cmake/XrplCore.cmake index 9854ea8cb5..d113982309 100644 --- a/cmake/XrplCore.cmake +++ b/cmake/XrplCore.cmake @@ -119,7 +119,11 @@ target_link_libraries( add_module(xrpl tx) target_link_libraries(xrpl.libxrpl.tx PUBLIC xrpl.libxrpl.ledger) -# Telemetry module +# Telemetry module — OpenTelemetry distributed tracing support. +# Sources: include/xrpl/telemetry/ (headers), src/libxrpl/telemetry/ (impl). +# When telemetry=ON, links the Conan-provided umbrella target +# opentelemetry-cpp::opentelemetry-cpp (individual component targets like +# ::api, ::sdk are not available in the Conan package). add_module(xrpl telemetry) target_link_libraries(xrpl.libxrpl.telemetry PUBLIC xrpl.libxrpl.basics xrpl.libxrpl.beast) if (telemetry) diff --git a/conanfile.py b/conanfile.py index ef4b883a49..2e92952b5f 100644 --- a/conanfile.py +++ b/conanfile.py @@ -142,6 +142,8 @@ class Xrpl(ConanFile): self.requires("jemalloc/5.3.0") if self.options.rocksdb: self.requires("rocksdb/10.5.1") + # OpenTelemetry C++ SDK for distributed tracing (optional). + # Provides OTLP/HTTP exporter, batch span processor, and trace API. if self.options.telemetry: self.requires("opentelemetry-cpp/1.18.0") self.requires("xxhash/0.8.3", **transitive_headers_opt) diff --git a/docker/telemetry/docker-compose.yml b/docker/telemetry/docker-compose.yml index ccfab652c4..9b31aaa536 100644 --- a/docker/telemetry/docker-compose.yml +++ b/docker/telemetry/docker-compose.yml @@ -1,3 +1,19 @@ +# Docker Compose stack for rippled OpenTelemetry observability. +# +# Provides three services for local development: +# - otel-collector: receives OTLP traces from rippled, batches and +# forwards them to Jaeger. Listens on ports 4317 (gRPC) and 4318 (HTTP). +# - jaeger: all-in-one tracing backend with UI on port 16686. +# - grafana: dashboards on port 3000, pre-configured with Jaeger datasource. +# +# Usage: +# docker compose -f docker/telemetry/docker-compose.yml up -d +# +# Configure rippled to export traces by adding to xrpld.cfg: +# [telemetry] +# enabled=1 +# endpoint=http://localhost:4318/v1/traces + version: "3.8" services: diff --git a/docker/telemetry/grafana/provisioning/datasources/jaeger.yaml b/docker/telemetry/grafana/provisioning/datasources/jaeger.yaml index ca3a4319bd..e410cb854b 100644 --- a/docker/telemetry/grafana/provisioning/datasources/jaeger.yaml +++ b/docker/telemetry/grafana/provisioning/datasources/jaeger.yaml @@ -1,3 +1,8 @@ +# Grafana datasource provisioning for the rippled telemetry stack. +# Auto-configures Jaeger as a trace data source on Grafana startup. +# Access Grafana at http://localhost:3000, then use Explore -> Jaeger +# to browse rippled traces. + apiVersion: 1 datasources: diff --git a/docker/telemetry/otel-collector-config.yaml b/docker/telemetry/otel-collector-config.yaml index 4d27ecb913..0231023969 100644 --- a/docker/telemetry/otel-collector-config.yaml +++ b/docker/telemetry/otel-collector-config.yaml @@ -1,3 +1,9 @@ +# OpenTelemetry Collector configuration for rippled development. +# +# Pipeline: OTLP receiver -> batch processor -> debug exporter + Jaeger. +# rippled sends traces via OTLP/HTTP to port 4318. The collector batches +# them and forwards to Jaeger via OTLP/gRPC on the Docker network. + receivers: otlp: protocols: diff --git a/docs/build/telemetry.md b/docs/build/telemetry.md new file mode 100644 index 0000000000..2b08791d87 --- /dev/null +++ b/docs/build/telemetry.md @@ -0,0 +1,277 @@ +# OpenTelemetry Tracing for Rippled + +This document explains how to build rippled with OpenTelemetry distributed tracing support, configure the runtime telemetry options, and set up the observability backend to view traces. + +- [OpenTelemetry Tracing for Rippled](#opentelemetry-tracing-for-rippled) + - [Overview](#overview) + - [Building with Telemetry](#building-with-telemetry) + - [Summary](#summary) + - [Build steps](#build-steps) + - [Install dependencies](#install-dependencies) + - [Call CMake](#call-cmake) + - [Build](#build) + - [Building without telemetry](#building-without-telemetry) + - [Runtime Configuration](#runtime-configuration) + - [Configuration options](#configuration-options) + - [Observability Stack](#observability-stack) + - [Start the stack](#start-the-stack) + - [Verify the stack](#verify-the-stack) + - [View traces in Jaeger](#view-traces-in-jaeger) + - [Running Tests](#running-tests) + - [Troubleshooting](#troubleshooting) + - [No traces appear in Jaeger](#no-traces-appear-in-jaeger) + - [Conan lockfile error](#conan-lockfile-error) + - [CMake target not found](#cmake-target-not-found) + - [Architecture](#architecture) + - [Key files](#key-files) + - [Conditional compilation](#conditional-compilation) + +## Overview + +Rippled supports optional [OpenTelemetry](https://opentelemetry.io/) distributed tracing. +When enabled, it instruments RPC requests with trace spans that are exported via +OTLP/HTTP to an OpenTelemetry Collector, which forwards them to a tracing backend +such as Jaeger. + +Telemetry is **off by default** at both compile time and runtime: + +- **Compile time**: The Conan option `telemetry` and CMake option `telemetry` must be set to `True`/`ON`. + When disabled, all tracing macros compile to `((void)0)` with zero overhead. +- **Runtime**: The `[telemetry]` config section must set `enabled=1`. + When disabled at runtime, a no-op implementation is used. + +## Building with Telemetry + +### Summary + +Follow the same instructions as mentioned in [BUILD.md](../../BUILD.md) but with the following changes: + +1. Pass `-o telemetry=True` to `conan install` to pull the `opentelemetry-cpp` dependency. +2. CMake will automatically pick up `telemetry=ON` from the Conan-generated toolchain. +3. Build as usual. + +--- + +### Build steps + +```bash +cd /path/to/rippled +rm -rf .build +mkdir .build +cd .build +``` + +#### Install dependencies + +The `telemetry` option adds `opentelemetry-cpp/1.18.0` as a dependency. +If the Conan lockfile does not yet include this package, bypass it with `--lockfile=""`. + +```bash +conan install .. \ + --output-folder . \ + --build missing \ + --settings build_type=Debug \ + -o telemetry=True \ + -o tests=True \ + -o xrpld=True \ + --lockfile="" +``` + +> **Note**: The first build with telemetry may take longer as `opentelemetry-cpp` +> and its transitive dependencies are compiled from source. + +#### Call CMake + +The Conan-generated toolchain file sets `telemetry=ON` automatically. +No additional CMake flags are needed beyond the standard ones. + +```bash +cmake .. -G Ninja \ + -DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake \ + -DCMAKE_BUILD_TYPE=Debug \ + -Dtests=ON -Dxrpld=ON +``` + +You should see in the CMake output: + +``` +-- OpenTelemetry tracing enabled +``` + +#### Build + +```bash +cmake --build . --parallel $(nproc) +``` + +### Building without telemetry + +Omit the `-o telemetry=True` option (or pass `-o telemetry=False`). +The `opentelemetry-cpp` dependency will not be downloaded, +the `XRPL_ENABLE_TELEMETRY` preprocessor define will not be set, +and all tracing macros will compile to no-ops. +The resulting binary is identical to one built before telemetry support was added. + +## Runtime Configuration + +Add a `[telemetry]` section to your `xrpld.cfg` file: + +```ini +[telemetry] +enabled=1 +service_name=rippled +endpoint=http://localhost:4318/v1/traces +sampling_ratio=1.0 +trace_rpc=1 +trace_transactions=1 +trace_consensus=1 +trace_peer=0 +``` + +### Configuration options + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `enabled` | int | `0` | Enable (`1`) or disable (`0`) telemetry at runtime | +| `service_name` | string | `rippled` | Service name reported in traces | +| `service_instance_id` | string | node public key | Unique instance identifier | +| `exporter` | string | `otlp_http` | Exporter type | +| `endpoint` | string | `http://localhost:4318/v1/traces` | OTLP/HTTP collector endpoint | +| `use_tls` | int | `0` | Enable TLS for the exporter connection | +| `tls_ca_cert` | string | (empty) | Path to CA certificate for TLS | +| `sampling_ratio` | double | `1.0` | Fraction of traces to sample (`0.0` to `1.0`) | +| `batch_size` | uint32 | `512` | Maximum spans per export batch | +| `batch_delay_ms` | uint32 | `5000` | Maximum delay (ms) before flushing a batch | +| `max_queue_size` | uint32 | `2048` | Maximum spans queued in memory | +| `trace_rpc` | int | `1` | Enable RPC request tracing | +| `trace_transactions` | int | `1` | Enable transaction lifecycle tracing | +| `trace_consensus` | int | `1` | Enable consensus round tracing | +| `trace_peer` | int | `0` | Enable peer message tracing (high volume) | +| `trace_ledger` | int | `1` | Enable ledger close tracing | + +## Observability Stack + +A Docker Compose stack is provided in `docker/telemetry/` with three services: + +| Service | Port | Purpose | +|---------|------|---------| +| **OTel Collector** | `4317` (gRPC), `4318` (HTTP), `13133` (health) | Receives OTLP spans, batches, and forwards to Jaeger | +| **Jaeger** | `16686` (UI) | Trace storage and visualization | +| **Grafana** | `3000` | Dashboards (Jaeger pre-configured as datasource) | + +### Start the stack + +```bash +docker compose -f docker/telemetry/docker-compose.yml up -d +``` + +### Verify the stack + +```bash +# Collector health +curl http://localhost:13133 + +# Jaeger UI +open http://localhost:16686 + +# Grafana +open http://localhost:3000 +``` + +### View traces in Jaeger + +1. Open `http://localhost:16686` in a browser. +2. Select the service name (e.g. `rippled`) from the **Service** dropdown. +3. Click **Find Traces**. +4. Click into any trace to see the span tree and attributes. + +Traced RPC operations produce a span hierarchy like: + +``` +rpc.request + └── rpc.command.server_info (xrpl.rpc.command=server_info, xrpl.rpc.status=success) +``` + +Each span includes attributes: +- `xrpl.rpc.command` — the RPC method name +- `xrpl.rpc.version` — API version +- `xrpl.rpc.role` — `admin` or `user` +- `xrpl.rpc.status` — `success` or `error` + +## Running Tests + +Unit tests run with the telemetry-enabled build regardless of whether the +observability stack is running. When no collector is available, the exporter +silently drops spans with no impact on test results. + +```bash +# Run all RPC tests +./xrpld --unittest=RPCCall,ServerInfo,AccountTx,LedgerRPC,Transaction --unittest-jobs $(nproc) + +# Run the full test suite +./xrpld --unittest --unittest-jobs $(nproc) +``` + +To generate traces during manual testing, start rippled in standalone mode: + +```bash +./xrpld --conf /path/to/xrpld.cfg --standalone --start +``` + +Then send RPC requests: + +```bash +curl -s -X POST http://127.0.0.1:5005/ \ + -H "Content-Type: application/json" \ + -d '{"method":"server_info","params":[{}]}' +``` + +## Troubleshooting + +### No traces appear in Jaeger + +1. Confirm the OTel Collector is running: `docker compose -f docker/telemetry/docker-compose.yml ps` +2. Check collector logs for errors: `docker compose -f docker/telemetry/docker-compose.yml logs otel-collector` +3. Confirm `[telemetry] enabled=1` is set in the rippled config. +4. Confirm `endpoint` points to the correct collector address (`http://localhost:4318/v1/traces`). +5. Wait for the batch delay to elapse (default `5000` ms) before checking Jaeger. + +### Conan lockfile error + +If you see `ERROR: Requirement 'opentelemetry-cpp/1.18.0' not in lockfile 'requires'`, +the lockfile was generated without the telemetry dependency. +Pass `--lockfile=""` to bypass the lockfile, or regenerate it with telemetry enabled. + +### CMake target not found + +If CMake reports that `opentelemetry-cpp` targets are not found, +ensure you ran `conan install` with `-o telemetry=True` and that the +Conan-generated toolchain file is being used. +The Conan package provides a single umbrella target +`opentelemetry-cpp::opentelemetry-cpp` (not individual component targets). + +## Architecture + +### Key files + +| File | Purpose | +|------|---------| +| `include/xrpl/telemetry/Telemetry.h` | Abstract telemetry interface and `Setup` struct | +| `include/xrpl/telemetry/SpanGuard.h` | RAII span guard (activates scope, ends span on destruction) | +| `src/libxrpl/telemetry/Telemetry.cpp` | OTel-backed implementation (`TelemetryImpl`) | +| `src/libxrpl/telemetry/TelemetryConfig.cpp` | Config parser (`setup_Telemetry()`) | +| `src/libxrpl/telemetry/NullTelemetry.cpp` | No-op implementation (used when disabled) | +| `src/xrpld/telemetry/TracingInstrumentation.h` | Convenience macros (`XRPL_TRACE_RPC`, etc.) | +| `src/xrpld/rpc/detail/ServerHandler.cpp` | RPC entry point instrumentation | +| `src/xrpld/rpc/detail/RPCHandler.cpp` | Per-command instrumentation | +| `docker/telemetry/docker-compose.yml` | Observability stack (Collector + Jaeger + Grafana) | +| `docker/telemetry/otel-collector-config.yaml` | OTel Collector pipeline configuration | + +### Conditional compilation + +All OpenTelemetry SDK headers are guarded behind `#ifdef XRPL_ENABLE_TELEMETRY`. +The instrumentation macros in `TracingInstrumentation.h` compile to `((void)0)` when +the define is absent. +At runtime, if `enabled=0` is set in config (or the section is omitted), a +`NullTelemetry` implementation is used that returns no-op spans. +This two-layer approach ensures zero overhead when telemetry is not wanted. diff --git a/include/xrpl/telemetry/SpanGuard.h b/include/xrpl/telemetry/SpanGuard.h index 79564881a5..77fc4e74ba 100644 --- a/include/xrpl/telemetry/SpanGuard.h +++ b/include/xrpl/telemetry/SpanGuard.h @@ -1,5 +1,17 @@ #pragma once +/** RAII guard for OpenTelemetry trace spans. + + Wraps an OTel Span and Scope together. On construction, the span is + activated on the current thread's context (via Scope). On destruction, + the span is ended and the previous context is restored. + + Used by the XRPL_TRACE_* macros in TracingInstrumentation.h. Can also + be stored in std::optional for conditional tracing (move-constructible). + + Only compiled when XRPL_ENABLE_TELEMETRY is defined. +*/ + #ifdef XRPL_ENABLE_TELEMETRY #include @@ -13,18 +25,34 @@ namespace xrpl { namespace telemetry { +/** RAII wrapper that activates a span on construction and ends it on + destruction. Non-copyable but move-constructible so it can be held + in std::optional for conditional tracing. +*/ class SpanGuard { + /** The OTel span being guarded. Set to nullptr after move. */ opentelemetry::nostd::shared_ptr span_; + + /** Scope that activates span_ on the current thread's context stack. */ opentelemetry::trace::Scope scope_; public: + /** Construct a guard that activates @p span on the current context. + + @param span The span to guard. Ended in the destructor. + */ explicit SpanGuard( opentelemetry::nostd::shared_ptr span) : span_(std::move(span)), scope_(span_) { } + /** Non-copyable. Move-constructible to support std::optional. + + The move constructor creates a new Scope from the transferred span, + because Scope is not movable. + */ SpanGuard(SpanGuard const&) = delete; SpanGuard& operator=(SpanGuard const&) = delete; SpanGuard(SpanGuard&& other) noexcept @@ -40,24 +68,32 @@ public: span_->End(); } + /** @return A mutable reference to the underlying span. */ opentelemetry::trace::Span& span() { return *span_; } + /** @return A const reference to the underlying span. */ opentelemetry::trace::Span const& span() const { return *span_; } + /** Mark the span status as OK. */ void setOk() { span_->SetStatus(opentelemetry::trace::StatusCode::kOk); } + /** Set an explicit status code on the span. + + @param code The OTel status code. + @param description Optional human-readable status description. + */ void setStatus( opentelemetry::trace::StatusCode code, @@ -66,6 +102,11 @@ public: span_->SetStatus(code, std::string(description)); } + /** Set a key-value attribute on the span. + + @param key Attribute name (e.g. "xrpl.rpc.command"). + @param value Attribute value (string, int, bool, etc.). + */ template void setAttribute(std::string_view key, T&& value) @@ -75,12 +116,21 @@ public: std::forward(value)); } + /** Add a named event to the span's timeline. + + @param name Event name. + */ void addEvent(std::string_view name) { span_->AddEvent(std::string(name)); } + /** Record an exception as a span event following OTel semantic + conventions, and mark the span status as error. + + @param e The exception to record. + */ void recordException(std::exception const& e) { @@ -92,6 +142,11 @@ public: opentelemetry::trace::StatusCode::kError, e.what()); } + /** Return the current OTel context. + + Useful for creating child spans on a different thread by passing + this context to Telemetry::startSpan(name, parentContext). + */ opentelemetry::context::Context context() const { diff --git a/include/xrpl/telemetry/Telemetry.h b/include/xrpl/telemetry/Telemetry.h index 5710f9a698..ad07cd8a35 100644 --- a/include/xrpl/telemetry/Telemetry.h +++ b/include/xrpl/telemetry/Telemetry.h @@ -1,5 +1,23 @@ #pragma once +/** Abstract interface for OpenTelemetry distributed tracing. + + Provides the Telemetry base class that all components use to create trace + spans. Two implementations exist: + + - TelemetryImpl (Telemetry.cpp): real OTel SDK integration, compiled + only when XRPL_ENABLE_TELEMETRY is defined and enabled at runtime. + - NullTelemetry (NullTelemetry.cpp): no-op stub used when telemetry is + disabled at compile time or runtime. + + The Setup struct holds all configuration parsed from the [telemetry] + section of xrpld.cfg. See TelemetryConfig.cpp for the parser and + cfg/xrpld-example.cfg for the available options. + + OTel SDK headers are conditionally included behind XRPL_ENABLE_TELEMETRY + so that builds without telemetry have zero dependency on opentelemetry-cpp. +*/ + #include #include @@ -21,55 +39,134 @@ namespace telemetry { class Telemetry { public: + /** Configuration parsed from the [telemetry] section of xrpld.cfg. + + All fields have sensible defaults so the section can be minimal + or omitted entirely. See TelemetryConfig.cpp for the parser. + */ struct Setup { + /** Master switch: true to enable tracing at runtime. */ bool enabled = false; + + /** OTel resource attribute `service.name`. */ std::string serviceName = "rippled"; + + /** OTel resource attribute `service.version` (set from BuildInfo). */ std::string serviceVersion; + + /** OTel resource attribute `service.instance.id` (defaults to node + public key). */ std::string serviceInstanceId; + /** Exporter type: currently only "otlp_http" is supported. */ std::string exporterType = "otlp_http"; + + /** OTLP/HTTP endpoint URL where spans are sent. */ std::string exporterEndpoint = "http://localhost:4318/v1/traces"; + + /** Whether to use TLS for the exporter connection. */ bool useTls = false; + + /** Path to a CA certificate bundle for TLS verification. */ std::string tlsCertPath; + /** Head-based sampling ratio in [0.0, 1.0]. 1.0 = trace everything. */ double samplingRatio = 1.0; + /** Maximum number of spans per batch export. */ std::uint32_t batchSize = 512; + + /** Delay between batch exports. */ std::chrono::milliseconds batchDelay{5000}; + + /** Maximum number of spans queued before dropping. */ std::uint32_t maxQueueSize = 2048; + /** Network identifier, added as an OTel resource attribute. */ std::uint32_t networkId = 0; + + /** Network type label (e.g. "mainnet", "testnet", "devnet"). */ std::string networkType = "mainnet"; + /** Enable tracing for transaction processing. */ bool traceTransactions = true; + + /** Enable tracing for consensus rounds. */ bool traceConsensus = true; + + /** Enable tracing for RPC request handling. */ bool traceRpc = true; + + /** Enable tracing for peer-to-peer messages (disabled by default + due to high volume). */ bool tracePeer = false; + + /** Enable tracing for ledger close/accept. */ bool traceLedger = true; }; virtual ~Telemetry() = default; + /** Initialize the tracing pipeline (exporter, processor, provider). + Call after construction. + */ virtual void start() = 0; + + /** Flush pending spans and shut down the tracing pipeline. + Call before destruction. + */ virtual void stop() = 0; + + /** @return true if this instance is actively exporting spans. */ virtual bool isEnabled() const = 0; + /** @return true if transaction processing should be traced. */ virtual bool shouldTraceTransactions() const = 0; + + /** @return true if consensus rounds should be traced. */ virtual bool shouldTraceConsensus() const = 0; + + /** @return true if RPC request handling should be traced. */ virtual bool shouldTraceRpc() const = 0; + + /** @return true if peer-to-peer messages should be traced. */ virtual bool shouldTracePeer() const = 0; #ifdef XRPL_ENABLE_TELEMETRY + /** Get or create a named tracer instance. + + @param name Tracer name used to identify the instrumentation library. + @return A shared pointer to the Tracer. + */ virtual opentelemetry::nostd::shared_ptr getTracer(std::string_view name = "rippled") = 0; + /** Start a new span on the current thread's context. + + The span becomes a child of the current active span (if any) via + OpenTelemetry's context propagation. + + @param name Span name (typically "rpc.command."). + @param kind The span kind (defaults to kInternal). + @return A shared pointer to the new Span. + */ virtual opentelemetry::nostd::shared_ptr startSpan( std::string_view name, opentelemetry::trace::SpanKind kind = opentelemetry::trace::SpanKind::kInternal) = 0; + /** Start a new span with an explicit parent context. + + Use this overload when the parent span is not on the current + thread's context stack (e.g. cross-thread trace propagation). + + @param name Span name. + @param parentContext The parent span's context. + @param kind The span kind (defaults to kInternal). + @return A shared pointer to the new Span. + */ virtual opentelemetry::nostd::shared_ptr startSpan( std::string_view name, @@ -79,9 +176,24 @@ public: #endif }; +/** Create a Telemetry instance. + + Returns a TelemetryImpl when setup.enabled is true, or a + NullTelemetry no-op stub otherwise. + + @param setup Configuration from the [telemetry] config section. + @param journal Journal for log output during initialization. +*/ std::unique_ptr make_Telemetry(Telemetry::Setup const& setup, beast::Journal journal); +/** Parse the [telemetry] config section into a Setup struct. + + @param section The [telemetry] config section. + @param nodePublicKey Node public key, used as default instance ID. + @param version Build version string. + @return A populated Setup struct with defaults for missing values. +*/ Telemetry::Setup setup_Telemetry( Section const& section, diff --git a/src/libxrpl/telemetry/NullTelemetry.cpp b/src/libxrpl/telemetry/NullTelemetry.cpp index 5062b9da0f..ae64d3305e 100644 --- a/src/libxrpl/telemetry/NullTelemetry.cpp +++ b/src/libxrpl/telemetry/NullTelemetry.cpp @@ -1,3 +1,15 @@ +/** No-op implementation of the Telemetry interface. + + Always compiled (regardless of XRPL_ENABLE_TELEMETRY). Provides the + make_Telemetry() factory when telemetry is compiled out (#ifndef), which + unconditionally returns a NullTelemetry that does nothing. + + When XRPL_ENABLE_TELEMETRY IS defined, the OTel virtual methods + (getTracer, startSpan) return noop tracers/spans. The make_Telemetry() + factory in this file is not used in that case -- Telemetry.cpp provides + its own factory that can return the real TelemetryImpl. +*/ + #include #ifdef XRPL_ENABLE_TELEMETRY @@ -9,9 +21,15 @@ namespace telemetry { namespace { +/** No-op Telemetry that returns immediately from every method. + + Used as the sole implementation when XRPL_ENABLE_TELEMETRY is not + defined, or as a fallback when it is defined but enabled=0. +*/ class NullTelemetry : public Telemetry { - Setup setup_; + /** Retained configuration (unused, kept for diagnostic access). */ + Setup const setup_; public: explicit NullTelemetry(Setup const& setup) : setup_(setup) @@ -89,8 +107,9 @@ public: } // namespace -// When XRPL_ENABLE_TELEMETRY is off OR setup.enabled is false, -// return NullTelemetry +/** Factory used when XRPL_ENABLE_TELEMETRY is not defined. + Unconditionally returns a NullTelemetry instance. +*/ #ifndef XRPL_ENABLE_TELEMETRY std::unique_ptr make_Telemetry(Telemetry::Setup const& setup, beast::Journal) diff --git a/src/libxrpl/telemetry/Telemetry.cpp b/src/libxrpl/telemetry/Telemetry.cpp index 685c7c556d..08f30cb723 100644 --- a/src/libxrpl/telemetry/Telemetry.cpp +++ b/src/libxrpl/telemetry/Telemetry.cpp @@ -1,3 +1,15 @@ +/** OpenTelemetry SDK implementation of the Telemetry interface. + + Compiled only when XRPL_ENABLE_TELEMETRY is defined (via CMake + telemetry=ON). Contains: + + - TelemetryImpl: configures the OTel SDK with an OTLP/HTTP exporter, + batch span processor, trace-ID-ratio sampler, and resource attributes. + - NullTelemetryOtel: no-op fallback used when telemetry is compiled in + but disabled at runtime (enabled=0 in config). + - make_Telemetry(): factory that selects the appropriate implementation. +*/ + #ifdef XRPL_ENABLE_TELEMETRY #include @@ -26,13 +38,16 @@ namespace trace_sdk = opentelemetry::sdk::trace; namespace otlp_http = opentelemetry::exporter::otlp; namespace resource = opentelemetry::sdk::resource; -// A no-op implementation used when XRPL_ENABLE_TELEMETRY is defined -// but setup.enabled is false. This lives in the anonymous namespace -// so there is no ODR conflict with the NullTelemetry in -// NullTelemetry.cpp (which also lives in its own anonymous namespace). +/** No-op implementation used when XRPL_ENABLE_TELEMETRY is defined but + setup.enabled is false at runtime. + + Lives in the anonymous namespace so there is no ODR conflict with the + NullTelemetry in NullTelemetry.cpp. +*/ class NullTelemetryOtel : public Telemetry { - Setup setup_; + /** Retained configuration (unused, kept for diagnostic access). */ + Setup const setup_; public: explicit NullTelemetryOtel(Setup const& setup) : setup_(setup) @@ -106,10 +121,24 @@ public: } }; +/** Full OTel SDK implementation that exports trace spans via OTLP/HTTP. + + Configures an OTLP/HTTP exporter, batch span processor, + TraceIdRatioBasedSampler, and resource attributes on start(). +*/ class TelemetryImpl : public Telemetry { - Setup setup_; - beast::Journal journal_; + /** Configuration from the [telemetry] config section. */ + Setup const setup_; + + /** Journal used for log output during start/stop. */ + beast::Journal const journal_; + + /** The SDK TracerProvider that owns the export pipeline. + + Held as std::shared_ptr so we can call ForceFlush() on shutdown. + Wrapped in a nostd::shared_ptr when registered as the global provider. + */ std::shared_ptr sdkProvider_; public: diff --git a/src/libxrpl/telemetry/TelemetryConfig.cpp b/src/libxrpl/telemetry/TelemetryConfig.cpp index bb834bef52..5cbbfa6a81 100644 --- a/src/libxrpl/telemetry/TelemetryConfig.cpp +++ b/src/libxrpl/telemetry/TelemetryConfig.cpp @@ -1,3 +1,12 @@ +/** Parser for the [telemetry] section of xrpld.cfg. + + Reads configuration values from the config file and populates a + Telemetry::Setup struct. All options have sensible defaults so the + section can be minimal or omitted entirely. + + See cfg/xrpld-example.cfg for the full list of available options. +*/ + #include namespace xrpl { diff --git a/src/xrpld/telemetry/TracingInstrumentation.h b/src/xrpld/telemetry/TracingInstrumentation.h index 45838bba0a..6baf1598e3 100644 --- a/src/xrpld/telemetry/TracingInstrumentation.h +++ b/src/xrpld/telemetry/TracingInstrumentation.h @@ -1,5 +1,23 @@ #pragma once +/** Convenience macros for instrumenting code with OpenTelemetry trace spans. + + When XRPL_ENABLE_TELEMETRY is defined, the macros create SpanGuard objects + that manage span lifetime via RAII. When not defined, all macros expand to + ((void)0) with zero overhead. + + Usage in instrumented code: + @code + XRPL_TRACE_RPC(app.getTelemetry(), "rpc.command." + name); + XRPL_TRACE_SET_ATTR("xrpl.rpc.command", name); + XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "success"); + @endcode + + @note Macro parameter names use leading/trailing underscores + (e.g. _tel_obj_) to avoid colliding with identifiers in the macro body, + specifically the ::xrpl::telemetry:: namespace qualifier. +*/ + #ifdef XRPL_ENABLE_TELEMETRY #include @@ -10,44 +28,67 @@ namespace xrpl { namespace telemetry { -// Start a span that is automatically ended when guard goes out of scope +/** Start an unconditional span, ended when the guard goes out of scope. + @param _tel_obj_ Telemetry instance reference. + @param _span_name_ Span name string. +*/ #define XRPL_TRACE_SPAN(_tel_obj_, _span_name_) \ auto _xrpl_span_ = (_tel_obj_).startSpan(_span_name_); \ ::xrpl::telemetry::SpanGuard _xrpl_guard_(_xrpl_span_) -// Start a span with specific kind +/** Start an unconditional span with a specific SpanKind. + @param _tel_obj_ Telemetry instance reference. + @param _span_name_ Span name string. + @param _span_kind_ opentelemetry::trace::SpanKind value. +*/ #define XRPL_TRACE_SPAN_KIND(_tel_obj_, _span_name_, _span_kind_) \ auto _xrpl_span_ = (_tel_obj_).startSpan(_span_name_, _span_kind_); \ ::xrpl::telemetry::SpanGuard _xrpl_guard_(_xrpl_span_) -// Conditional span for RPC tracing +/** Conditionally start a span for RPC tracing. + The span is only created if shouldTraceRpc() returns true. + @param _tel_obj_ Telemetry instance reference. + @param _span_name_ Span name string. +*/ #define XRPL_TRACE_RPC(_tel_obj_, _span_name_) \ std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \ if ((_tel_obj_).shouldTraceRpc()) { \ _xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \ } -// Conditional span for transaction tracing +/** Conditionally start a span for transaction tracing. + The span is only created if shouldTraceTransactions() returns true. + @param _tel_obj_ Telemetry instance reference. + @param _span_name_ Span name string. +*/ #define XRPL_TRACE_TX(_tel_obj_, _span_name_) \ std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \ if ((_tel_obj_).shouldTraceTransactions()) { \ _xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \ } -// Conditional span for consensus tracing +/** Conditionally start a span for consensus tracing. + The span is only created if shouldTraceConsensus() returns true. + @param _tel_obj_ Telemetry instance reference. + @param _span_name_ Span name string. +*/ #define XRPL_TRACE_CONSENSUS(_tel_obj_, _span_name_) \ std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \ if ((_tel_obj_).shouldTraceConsensus()) { \ _xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \ } -// Set attribute on current span (if exists) +/** Set a key-value attribute on the current span (if it exists). + Must be used after one of the XRPL_TRACE_* span macros. +*/ #define XRPL_TRACE_SET_ATTR(key, value) \ if (_xrpl_guard_.has_value()) { \ _xrpl_guard_->setAttribute(key, value); \ } -// Record exception on current span +/** Record an exception on the current span and mark it as error. + Must be used after one of the XRPL_TRACE_* span macros. +*/ #define XRPL_TRACE_EXCEPTION(e) \ if (_xrpl_guard_.has_value()) { \ _xrpl_guard_->recordException(e); \