From 1c72714e02f1dfc36a7a8a654bfbb20012c8ee00 Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Thu, 26 Feb 2026 16:17:27 +0000 Subject: [PATCH] Phase 1b: Telemetry core infrastructure Add the OpenTelemetry telemetry library and supporting infrastructure: Build system: - Conan opentelemetry-cpp dependency with OTLP/gRPC exporter - CMake integration for xrpl_telemetry library target - Levelization ordering updates Core library (libxrpl): - Telemetry class: provider lifecycle, span creation, sampling config - SpanGuard: RAII span management with attribute/exception helpers - TelemetryConfig: parse [telemetry] config section - NullTelemetry: no-op implementation when telemetry is disabled Application integration: - Telemetry member in ApplicationImp with start/stop lifecycle - getTelemetry() interface on Application - ServiceRegistry telemetry accessor Docker observability stack: - OTel Collector, Jaeger, Grafana docker-compose setup - Collector config with OTLP gRPC receiver and Jaeger exporter Config and docs: - Example telemetry config section in xrpld-example.cfg - Build documentation for telemetry setup Co-Authored-By: Claude Opus 4.6 --- .../scripts/levelization/results/ordering.txt | 4 + CMakeLists.txt | 12 + cfg/xrpld-example.cfg | 43 +++ cmake/XrplCore.cmake | 12 + conan.lock | 11 +- conanfile.py | 9 + docker/telemetry/docker-compose.yml | 60 ++++ .../provisioning/datasources/jaeger.yaml | 12 + docker/telemetry/otel-collector-config.yaml | 33 +++ docs/build/telemetry.md | 278 +++++++++++++++++ include/xrpl/core/ServiceRegistry.h | 6 + include/xrpl/telemetry/SpanGuard.h | 155 ++++++++++ include/xrpl/telemetry/Telemetry.h | 209 +++++++++++++ src/libxrpl/telemetry/NullTelemetry.cpp | 121 ++++++++ src/libxrpl/telemetry/Telemetry.cpp | 279 ++++++++++++++++++ src/libxrpl/telemetry/TelemetryConfig.cpp | 52 ++++ src/xrpld/app/main/Application.cpp | 21 +- 17 files changed, 1313 insertions(+), 4 deletions(-) create mode 100644 docker/telemetry/docker-compose.yml create mode 100644 docker/telemetry/grafana/provisioning/datasources/jaeger.yaml create mode 100644 docker/telemetry/otel-collector-config.yaml create mode 100644 docs/build/telemetry.md create mode 100644 include/xrpl/telemetry/SpanGuard.h create mode 100644 include/xrpl/telemetry/Telemetry.h create mode 100644 src/libxrpl/telemetry/NullTelemetry.cpp create mode 100644 src/libxrpl/telemetry/Telemetry.cpp create mode 100644 src/libxrpl/telemetry/TelemetryConfig.cpp diff --git a/.github/scripts/levelization/results/ordering.txt b/.github/scripts/levelization/results/ordering.txt index d4d07eee94..6e4a3a21a1 100644 --- a/.github/scripts/levelization/results/ordering.txt +++ b/.github/scripts/levelization/results/ordering.txt @@ -32,6 +32,8 @@ libxrpl.server > xrpl.server libxrpl.shamap > xrpl.basics libxrpl.shamap > xrpl.protocol libxrpl.shamap > xrpl.shamap +libxrpl.telemetry > xrpl.basics +libxrpl.telemetry > xrpl.telemetry libxrpl.tx > xrpl.basics libxrpl.tx > xrpl.conditions libxrpl.tx > xrpl.core @@ -206,6 +208,7 @@ xrpl.server > xrpl.shamap xrpl.shamap > xrpl.basics xrpl.shamap > xrpl.nodestore xrpl.shamap > xrpl.protocol +xrpl.telemetry > xrpl.basics xrpl.tx > xrpl.basics xrpl.tx > xrpl.core xrpl.tx > xrpl.ledger @@ -224,6 +227,7 @@ xrpld.app > xrpl.rdb xrpld.app > xrpl.resource xrpld.app > xrpl.server xrpld.app > xrpl.shamap +xrpld.app > xrpl.telemetry xrpld.app > xrpl.tx xrpld.consensus > xrpl.basics xrpld.consensus > xrpl.json diff --git a/CMakeLists.txt b/CMakeLists.txt index 33f68451c5..584c373bbc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,6 +117,18 @@ if(rocksdb) target_link_libraries(xrpl_libs INTERFACE RocksDB::rocksdb) endif() +# OpenTelemetry distributed tracing (optional). +# When ON, links against opentelemetry-cpp and defines XRPL_ENABLE_TELEMETRY +# so that tracing macros in TracingInstrumentation.h are compiled in. +# When OFF (default), all tracing code compiles to no-ops with zero overhead. +# Enable via: conan install -o telemetry=True, or cmake -Dtelemetry=ON. +option(telemetry "Enable OpenTelemetry tracing" OFF) +if (telemetry) + find_package(opentelemetry-cpp CONFIG REQUIRED) + add_compile_definitions(XRPL_ENABLE_TELEMETRY) + message(STATUS "OpenTelemetry tracing enabled") +endif () + # Work around changes to Conan recipe for now. if(TARGET nudb::core) set(nudb nudb::core) diff --git a/cfg/xrpld-example.cfg b/cfg/xrpld-example.cfg index 995d4e65ff..3e234b439a 100644 --- a/cfg/xrpld-example.cfg +++ b/cfg/xrpld-example.cfg @@ -1529,3 +1529,46 @@ validators.txt # set to ssl_verify to 0. [ssl_verify] 1 +#------------------------------------------------------------------------------- +# +# 11. Telemetry (OpenTelemetry Tracing) +# +#------------------------------------------------------------------------------- +# +# Enables distributed tracing via OpenTelemetry. Requires building with +# -DXRPL_ENABLE_TELEMETRY=ON (telemetry Conan option). +# +# [telemetry] +# +# enabled=0 +# +# Enable or disable telemetry at runtime. Default: 0 (disabled). +# +# endpoint=http://localhost:4318/v1/traces +# +# The OpenTelemetry Collector endpoint (OTLP/HTTP). Default: http://localhost:4318/v1/traces. +# +# exporter=otlp_http +# +# Exporter type: otlp_http. Default: otlp_http. +# +# sampling_ratio=1.0 +# +# Fraction of traces to sample (0.0 to 1.0). Default: 1.0 (all traces). +# +# trace_rpc=1 +# +# Enable RPC request tracing. Default: 1. +# +# trace_transactions=1 +# +# Enable transaction lifecycle tracing. Default: 1. +# +# trace_consensus=1 +# +# Enable consensus round tracing. Default: 1. +# +# trace_peer=0 +# +# Enable peer message tracing (high volume). Default: 0. +# diff --git a/cmake/XrplCore.cmake b/cmake/XrplCore.cmake index 580c4155eb..d9df4023bc 100644 --- a/cmake/XrplCore.cmake +++ b/cmake/XrplCore.cmake @@ -180,6 +180,17 @@ target_link_libraries( add_module(xrpl tx) target_link_libraries(xrpl.libxrpl.tx PUBLIC xrpl.libxrpl.ledger) +# Telemetry module — OpenTelemetry distributed tracing support. +# Sources: include/xrpl/telemetry/ (headers), src/libxrpl/telemetry/ (impl). +# When telemetry=ON, links the Conan-provided umbrella target +# opentelemetry-cpp::opentelemetry-cpp (individual component targets like +# ::api, ::sdk are not available in the Conan package). +add_module(xrpl telemetry) +target_link_libraries(xrpl.libxrpl.telemetry PUBLIC xrpl.libxrpl.basics xrpl.libxrpl.beast) +if (telemetry) + target_link_libraries(xrpl.libxrpl.telemetry PUBLIC opentelemetry-cpp::opentelemetry-cpp) +endif () + add_library(xrpl.libxrpl) set_target_properties(xrpl.libxrpl PROPERTIES OUTPUT_NAME xrpl) @@ -210,6 +221,7 @@ target_link_modules( resource server shamap + telemetry tx ) diff --git a/conan.lock b/conan.lock index 900d3526e1..39c612f00e 100644 --- a/conan.lock +++ b/conan.lock @@ -10,10 +10,13 @@ "rocksdb/10.5.1#4a197eca381a3e5ae8adf8cffa5aacd0%1765850186.86", "re2/20230301#ca3b241baec15bd31ea9187150e0b333%1765850148.103", "protobuf/6.32.1#f481fd276fc23a33b85a3ed1e898b693%1765850161.038", - "openssl/3.5.5#05a4ac5b7323f7a329b2db1391d9941f%1769599205.414", + "opentelemetry-cpp/1.18.0#efd9851e173f8a13b9c7d35232de8cf1%1750409186.472", + "openssl/3.5.5#05a4ac5b7323f7a329b2db1391d9941f%1770229825.601", "nudb/2.0.9#0432758a24204da08fee953ec9ea03cb%1769436073.32", + "nlohmann_json/3.11.3#45828be26eb619a2e04ca517bb7b828d%1701220705.259", "lz4/1.10.0#59fc63cac7f10fbe8e05c7e62c2f3504%1765850143.914", "libiconv/1.17#1e65319e945f2d31941a9d28cc13c058%1765842973.492", + "libcurl/8.18.0#364bc3755cb9ef84ed9a7ae9c7efc1c1%1770984390.024", "libbacktrace/cci.20210118#a7691bfccd8caaf66309df196790a5a1%1765842973.03", "libarchive/3.8.1#ffee18995c706e02bf96e7a2f7042e0d%1765850144.736", "jemalloc/5.3.0#e951da9cf599e956cebc117880d2d9f8%1729241615.244", @@ -30,9 +33,15 @@ "zlib/1.3.1#b8bc2603263cf7eccbd6e17e66b0ed76%1765850150.075", "strawberryperl/5.32.1.1#707032463aa0620fa17ec0d887f5fe41%1765850165.196", "protobuf/6.32.1#f481fd276fc23a33b85a3ed1e898b693%1765850161.038", + "pkgconf/2.5.1#93c2051284cba1279494a43a4fcfeae2%1757684701.089", + "opentelemetry-proto/1.4.0#4096a3b05916675ef9628f3ffd571f51%1732731336.11", + "ninja/1.13.2#c8c5dc2a52ed6e4e42a66d75b4717ceb%1764096931.974", "nasm/2.16.01#31e26f2ee3c4346ecd347911bd126904%1765850144.707", "msys2/cci.latest#eea83308ad7e9023f7318c60d5a9e6cb%1770199879.083", + "meson/1.10.0#60786758ea978964c24525de19603cf4%1768294926.103", "m4/1.4.19#70dc8bbb33e981d119d2acc0175cf381%1763158052.846", + "libtool/2.4.7#14e7739cc128bc1623d2ed318008e47e%1755679003.847", + "gnu-config/cci.20210814#466e9d4d7779e1c142443f7ea44b4284%1762363589.329", "cmake/4.2.0#ae0a44f44a1ef9ab68fd4b3e9a1f8671%1765850153.937", "cmake/3.31.10#313d16a1aa16bbdb2ca0792467214b76%1765850153.479", "b2/5.3.3#107c15377719889654eb9a162a673975%1765850144.355", diff --git a/conanfile.py b/conanfile.py index 7300b537d9..be896bf1d3 100644 --- a/conanfile.py +++ b/conanfile.py @@ -22,6 +22,7 @@ class Xrpl(ConanFile): "rocksdb": [True, False], "shared": [True, False], "static": [True, False], + "telemetry": [True, False], "tests": [True, False], "unity": [True, False], "xrpld": [True, False], @@ -54,6 +55,7 @@ class Xrpl(ConanFile): "rocksdb": True, "shared": False, "static": True, + "telemetry": True, "tests": False, "unity": False, "xrpld": False, @@ -140,6 +142,10 @@ class Xrpl(ConanFile): self.requires("jemalloc/5.3.0") if self.options.rocksdb: self.requires("rocksdb/10.5.1") + # OpenTelemetry C++ SDK for distributed tracing (optional). + # Provides OTLP/HTTP exporter, batch span processor, and trace API. + if self.options.telemetry: + self.requires("opentelemetry-cpp/1.18.0") self.requires("xxhash/0.8.3", **transitive_headers_opt) exports_sources = ( @@ -168,6 +174,7 @@ class Xrpl(ConanFile): tc.variables["rocksdb"] = self.options.rocksdb tc.variables["BUILD_SHARED_LIBS"] = self.options.shared tc.variables["static"] = self.options.static + tc.variables["telemetry"] = self.options.telemetry tc.variables["unity"] = self.options.unity tc.variables["xrpld"] = self.options.xrpld tc.generate() @@ -220,3 +227,5 @@ class Xrpl(ConanFile): ] if self.options.rocksdb: libxrpl.requires.append("rocksdb::librocksdb") + if self.options.telemetry: + libxrpl.requires.append("opentelemetry-cpp::opentelemetry-cpp") diff --git a/docker/telemetry/docker-compose.yml b/docker/telemetry/docker-compose.yml new file mode 100644 index 0000000000..e6780ac2c3 --- /dev/null +++ b/docker/telemetry/docker-compose.yml @@ -0,0 +1,60 @@ +# Docker Compose stack for rippled OpenTelemetry observability. +# +# Provides three services for local development: +# - otel-collector: receives OTLP traces from rippled, batches and +# forwards them to Jaeger. Listens on ports 4317 (gRPC) and 4318 (HTTP). +# - jaeger: all-in-one tracing backend with UI on port 16686. +# - grafana: dashboards on port 3000, pre-configured with Jaeger datasource. +# +# Usage: +# docker compose -f docker/telemetry/docker-compose.yml up -d +# +# Configure rippled to export traces by adding to xrpld.cfg: +# [telemetry] +# enabled=1 +# endpoint=http://localhost:4318/v1/traces + +version: "3.8" + +services: + otel-collector: + image: otel/opentelemetry-collector-contrib:latest + command: ["--config=/etc/otel-collector-config.yaml"] + ports: + - "4317:4317" # OTLP gRPC + - "4318:4318" # OTLP HTTP + - "13133:13133" # Health check + volumes: + - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro + depends_on: + - jaeger + networks: + - rippled-telemetry + + jaeger: + image: jaegertracing/all-in-one:latest + environment: + - COLLECTOR_OTLP_ENABLED=true + ports: + - "16686:16686" # Jaeger UI + - "14250:14250" # gRPC + networks: + - rippled-telemetry + + grafana: + image: grafana/grafana:latest + environment: + - GF_AUTH_ANONYMOUS_ENABLED=true + - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin + ports: + - "3000:3000" + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning:ro + depends_on: + - jaeger + networks: + - rippled-telemetry + +networks: + rippled-telemetry: + driver: bridge diff --git a/docker/telemetry/grafana/provisioning/datasources/jaeger.yaml b/docker/telemetry/grafana/provisioning/datasources/jaeger.yaml new file mode 100644 index 0000000000..e410cb854b --- /dev/null +++ b/docker/telemetry/grafana/provisioning/datasources/jaeger.yaml @@ -0,0 +1,12 @@ +# Grafana datasource provisioning for the rippled telemetry stack. +# Auto-configures Jaeger as a trace data source on Grafana startup. +# Access Grafana at http://localhost:3000, then use Explore -> Jaeger +# to browse rippled traces. + +apiVersion: 1 + +datasources: + - name: Jaeger + type: jaeger + access: proxy + url: http://jaeger:16686 diff --git a/docker/telemetry/otel-collector-config.yaml b/docker/telemetry/otel-collector-config.yaml new file mode 100644 index 0000000000..0231023969 --- /dev/null +++ b/docker/telemetry/otel-collector-config.yaml @@ -0,0 +1,33 @@ +# OpenTelemetry Collector configuration for rippled development. +# +# Pipeline: OTLP receiver -> batch processor -> debug exporter + Jaeger. +# rippled sends traces via OTLP/HTTP to port 4318. The collector batches +# them and forwards to Jaeger via OTLP/gRPC on the Docker network. + +receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + +processors: + batch: + timeout: 1s + send_batch_size: 100 + +exporters: + debug: + verbosity: detailed + otlp/jaeger: + endpoint: jaeger:4317 + tls: + insecure: true + +service: + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [debug, otlp/jaeger] diff --git a/docs/build/telemetry.md b/docs/build/telemetry.md new file mode 100644 index 0000000000..8f6b6755e2 --- /dev/null +++ b/docs/build/telemetry.md @@ -0,0 +1,278 @@ +# OpenTelemetry Tracing for Rippled + +This document explains how to build rippled with OpenTelemetry distributed tracing support, configure the runtime telemetry options, and set up the observability backend to view traces. + +- [OpenTelemetry Tracing for Rippled](#opentelemetry-tracing-for-rippled) + - [Overview](#overview) + - [Building with Telemetry](#building-with-telemetry) + - [Summary](#summary) + - [Build steps](#build-steps) + - [Install dependencies](#install-dependencies) + - [Call CMake](#call-cmake) + - [Build](#build) + - [Building without telemetry](#building-without-telemetry) + - [Runtime Configuration](#runtime-configuration) + - [Configuration options](#configuration-options) + - [Observability Stack](#observability-stack) + - [Start the stack](#start-the-stack) + - [Verify the stack](#verify-the-stack) + - [View traces in Jaeger](#view-traces-in-jaeger) + - [Running Tests](#running-tests) + - [Troubleshooting](#troubleshooting) + - [No traces appear in Jaeger](#no-traces-appear-in-jaeger) + - [Conan lockfile error](#conan-lockfile-error) + - [CMake target not found](#cmake-target-not-found) + - [Architecture](#architecture) + - [Key files](#key-files) + - [Conditional compilation](#conditional-compilation) + +## Overview + +Rippled supports optional [OpenTelemetry](https://opentelemetry.io/) distributed tracing. +When enabled, it instruments RPC requests with trace spans that are exported via +OTLP/HTTP to an OpenTelemetry Collector, which forwards them to a tracing backend +such as Jaeger. + +Telemetry is **off by default** at both compile time and runtime: + +- **Compile time**: The Conan option `telemetry` and CMake option `telemetry` must be set to `True`/`ON`. + When disabled, all tracing macros compile to `((void)0)` with zero overhead. +- **Runtime**: The `[telemetry]` config section must set `enabled=1`. + When disabled at runtime, a no-op implementation is used. + +## Building with Telemetry + +### Summary + +Follow the same instructions as mentioned in [BUILD.md](../../BUILD.md) but with the following changes: + +1. Pass `-o telemetry=True` to `conan install` to pull the `opentelemetry-cpp` dependency. +2. CMake will automatically pick up `telemetry=ON` from the Conan-generated toolchain. +3. Build as usual. + +--- + +### Build steps + +```bash +cd /path/to/rippled +rm -rf .build +mkdir .build +cd .build +``` + +#### Install dependencies + +The `telemetry` option adds `opentelemetry-cpp/1.18.0` as a dependency. +If the Conan lockfile does not yet include this package, bypass it with `--lockfile=""`. + +```bash +conan install .. \ + --output-folder . \ + --build missing \ + --settings build_type=Debug \ + -o telemetry=True \ + -o tests=True \ + -o xrpld=True \ + --lockfile="" +``` + +> **Note**: The first build with telemetry may take longer as `opentelemetry-cpp` +> and its transitive dependencies are compiled from source. + +#### Call CMake + +The Conan-generated toolchain file sets `telemetry=ON` automatically. +No additional CMake flags are needed beyond the standard ones. + +```bash +cmake .. -G Ninja \ + -DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake \ + -DCMAKE_BUILD_TYPE=Debug \ + -Dtests=ON -Dxrpld=ON +``` + +You should see in the CMake output: + +``` +-- OpenTelemetry tracing enabled +``` + +#### Build + +```bash +cmake --build . --parallel $(nproc) +``` + +### Building without telemetry + +Omit the `-o telemetry=True` option (or pass `-o telemetry=False`). +The `opentelemetry-cpp` dependency will not be downloaded, +the `XRPL_ENABLE_TELEMETRY` preprocessor define will not be set, +and all tracing macros will compile to no-ops. +The resulting binary is identical to one built before telemetry support was added. + +## Runtime Configuration + +Add a `[telemetry]` section to your `xrpld.cfg` file: + +```ini +[telemetry] +enabled=1 +service_name=rippled +endpoint=http://localhost:4318/v1/traces +sampling_ratio=1.0 +trace_rpc=1 +trace_transactions=1 +trace_consensus=1 +trace_peer=0 +``` + +### Configuration options + +| Option | Type | Default | Description | +| --------------------- | ------ | --------------------------------- | -------------------------------------------------- | +| `enabled` | int | `0` | Enable (`1`) or disable (`0`) telemetry at runtime | +| `service_name` | string | `rippled` | Service name reported in traces | +| `service_instance_id` | string | node public key | Unique instance identifier | +| `exporter` | string | `otlp_http` | Exporter type | +| `endpoint` | string | `http://localhost:4318/v1/traces` | OTLP/HTTP collector endpoint | +| `use_tls` | int | `0` | Enable TLS for the exporter connection | +| `tls_ca_cert` | string | (empty) | Path to CA certificate for TLS | +| `sampling_ratio` | double | `1.0` | Fraction of traces to sample (`0.0` to `1.0`) | +| `batch_size` | uint32 | `512` | Maximum spans per export batch | +| `batch_delay_ms` | uint32 | `5000` | Maximum delay (ms) before flushing a batch | +| `max_queue_size` | uint32 | `2048` | Maximum spans queued in memory | +| `trace_rpc` | int | `1` | Enable RPC request tracing | +| `trace_transactions` | int | `1` | Enable transaction lifecycle tracing | +| `trace_consensus` | int | `1` | Enable consensus round tracing | +| `trace_peer` | int | `0` | Enable peer message tracing (high volume) | +| `trace_ledger` | int | `1` | Enable ledger close tracing | + +## Observability Stack + +A Docker Compose stack is provided in `docker/telemetry/` with three services: + +| Service | Port | Purpose | +| ------------------ | ---------------------------------------------- | ---------------------------------------------------- | +| **OTel Collector** | `4317` (gRPC), `4318` (HTTP), `13133` (health) | Receives OTLP spans, batches, and forwards to Jaeger | +| **Jaeger** | `16686` (UI) | Trace storage and visualization | +| **Grafana** | `3000` | Dashboards (Jaeger pre-configured as datasource) | + +### Start the stack + +```bash +docker compose -f docker/telemetry/docker-compose.yml up -d +``` + +### Verify the stack + +```bash +# Collector health +curl http://localhost:13133 + +# Jaeger UI +open http://localhost:16686 + +# Grafana +open http://localhost:3000 +``` + +### View traces in Jaeger + +1. Open `http://localhost:16686` in a browser. +2. Select the service name (e.g. `rippled`) from the **Service** dropdown. +3. Click **Find Traces**. +4. Click into any trace to see the span tree and attributes. + +Traced RPC operations produce a span hierarchy like: + +``` +rpc.request + └── rpc.command.server_info (xrpl.rpc.command=server_info, xrpl.rpc.status=success) +``` + +Each span includes attributes: + +- `xrpl.rpc.command` — the RPC method name +- `xrpl.rpc.version` — API version +- `xrpl.rpc.role` — `admin` or `user` +- `xrpl.rpc.status` — `success` or `error` + +## Running Tests + +Unit tests run with the telemetry-enabled build regardless of whether the +observability stack is running. When no collector is available, the exporter +silently drops spans with no impact on test results. + +```bash +# Run all RPC tests +./xrpld --unittest=RPCCall,ServerInfo,AccountTx,LedgerRPC,Transaction --unittest-jobs $(nproc) + +# Run the full test suite +./xrpld --unittest --unittest-jobs $(nproc) +``` + +To generate traces during manual testing, start rippled in standalone mode: + +```bash +./xrpld --conf /path/to/xrpld.cfg --standalone --start +``` + +Then send RPC requests: + +```bash +curl -s -X POST http://127.0.0.1:5005/ \ + -H "Content-Type: application/json" \ + -d '{"method":"server_info","params":[{}]}' +``` + +## Troubleshooting + +### No traces appear in Jaeger + +1. Confirm the OTel Collector is running: `docker compose -f docker/telemetry/docker-compose.yml ps` +2. Check collector logs for errors: `docker compose -f docker/telemetry/docker-compose.yml logs otel-collector` +3. Confirm `[telemetry] enabled=1` is set in the rippled config. +4. Confirm `endpoint` points to the correct collector address (`http://localhost:4318/v1/traces`). +5. Wait for the batch delay to elapse (default `5000` ms) before checking Jaeger. + +### Conan lockfile error + +If you see `ERROR: Requirement 'opentelemetry-cpp/1.18.0' not in lockfile 'requires'`, +the lockfile was generated without the telemetry dependency. +Pass `--lockfile=""` to bypass the lockfile, or regenerate it with telemetry enabled. + +### CMake target not found + +If CMake reports that `opentelemetry-cpp` targets are not found, +ensure you ran `conan install` with `-o telemetry=True` and that the +Conan-generated toolchain file is being used. +The Conan package provides a single umbrella target +`opentelemetry-cpp::opentelemetry-cpp` (not individual component targets). + +## Architecture + +### Key files + +| File | Purpose | +| ---------------------------------------------- | ----------------------------------------------------------- | +| `include/xrpl/telemetry/Telemetry.h` | Abstract telemetry interface and `Setup` struct | +| `include/xrpl/telemetry/SpanGuard.h` | RAII span guard (activates scope, ends span on destruction) | +| `src/libxrpl/telemetry/Telemetry.cpp` | OTel-backed implementation (`TelemetryImpl`) | +| `src/libxrpl/telemetry/TelemetryConfig.cpp` | Config parser (`setup_Telemetry()`) | +| `src/libxrpl/telemetry/NullTelemetry.cpp` | No-op implementation (used when disabled) | +| `src/xrpld/telemetry/TracingInstrumentation.h` | Convenience macros (`XRPL_TRACE_RPC`, etc.) | +| `src/xrpld/rpc/detail/ServerHandler.cpp` | RPC entry point instrumentation | +| `src/xrpld/rpc/detail/RPCHandler.cpp` | Per-command instrumentation | +| `docker/telemetry/docker-compose.yml` | Observability stack (Collector + Jaeger + Grafana) | +| `docker/telemetry/otel-collector-config.yaml` | OTel Collector pipeline configuration | + +### Conditional compilation + +All OpenTelemetry SDK headers are guarded behind `#ifdef XRPL_ENABLE_TELEMETRY`. +The instrumentation macros in `TracingInstrumentation.h` compile to `((void)0)` when +the define is absent. +At runtime, if `enabled=0` is set in config (or the section is omitted), a +`NullTelemetry` implementation is used that returns no-op spans. +This two-layer approach ensures zero overhead when telemetry is not wanted. diff --git a/include/xrpl/core/ServiceRegistry.h b/include/xrpl/core/ServiceRegistry.h index 1a2e33d5ee..200b1bf59c 100644 --- a/include/xrpl/core/ServiceRegistry.h +++ b/include/xrpl/core/ServiceRegistry.h @@ -19,6 +19,9 @@ class Manager; namespace perf { class PerfLog; } +namespace telemetry { +class Telemetry; +} // This is temporary until we migrate all code to use ServiceRegistry. class Application; @@ -205,6 +208,9 @@ public: virtual perf::PerfLog& getPerfLog() = 0; + virtual telemetry::Telemetry& + getTelemetry() = 0; + // Configuration and state virtual bool isStopping() const = 0; diff --git a/include/xrpl/telemetry/SpanGuard.h b/include/xrpl/telemetry/SpanGuard.h new file mode 100644 index 0000000000..07ad8e9ae7 --- /dev/null +++ b/include/xrpl/telemetry/SpanGuard.h @@ -0,0 +1,155 @@ +#pragma once + +/** RAII guard for OpenTelemetry trace spans. + + Wraps an OTel Span and Scope together. On construction, the span is + activated on the current thread's context (via Scope). On destruction, + the span is ended and the previous context is restored. + + Used by the XRPL_TRACE_* macros in TracingInstrumentation.h. Can also + be stored in std::optional for conditional tracing (move-constructible). + + Only compiled when XRPL_ENABLE_TELEMETRY is defined. +*/ + +#ifdef XRPL_ENABLE_TELEMETRY + +#include +#include +#include +#include + +#include +#include + +namespace xrpl { +namespace telemetry { + +/** RAII wrapper that activates a span on construction and ends it on + destruction. Non-copyable but move-constructible so it can be held + in std::optional for conditional tracing. +*/ +class SpanGuard +{ + /** The OTel span being guarded. Set to nullptr after move. */ + opentelemetry::nostd::shared_ptr span_; + + /** Scope that activates span_ on the current thread's context stack. */ + opentelemetry::trace::Scope scope_; + +public: + /** Construct a guard that activates @p span on the current context. + + @param span The span to guard. Ended in the destructor. + */ + explicit SpanGuard(opentelemetry::nostd::shared_ptr span) + : span_(std::move(span)), scope_(span_) + { + } + + /** Non-copyable. Move-constructible to support std::optional. + + The move constructor creates a new Scope from the transferred span, + because Scope is not movable. + */ + SpanGuard(SpanGuard const&) = delete; + SpanGuard& + operator=(SpanGuard const&) = delete; + SpanGuard(SpanGuard&& other) noexcept : span_(std::move(other.span_)), scope_(span_) + { + other.span_ = nullptr; + } + SpanGuard& + operator=(SpanGuard&&) = delete; + + ~SpanGuard() + { + if (span_) + span_->End(); + } + + /** @return A mutable reference to the underlying span. */ + opentelemetry::trace::Span& + span() + { + return *span_; + } + + /** @return A const reference to the underlying span. */ + opentelemetry::trace::Span const& + span() const + { + return *span_; + } + + /** Mark the span status as OK. */ + void + setOk() + { + span_->SetStatus(opentelemetry::trace::StatusCode::kOk); + } + + /** Set an explicit status code on the span. + + @param code The OTel status code. + @param description Optional human-readable status description. + */ + void + setStatus(opentelemetry::trace::StatusCode code, std::string_view description = "") + { + span_->SetStatus(code, std::string(description)); + } + + /** Set a key-value attribute on the span. + + @param key Attribute name (e.g. "xrpl.rpc.command"). + @param value Attribute value (string, int, bool, etc.). + */ + template + void + setAttribute(std::string_view key, T&& value) + { + span_->SetAttribute( + opentelemetry::nostd::string_view(key.data(), key.size()), std::forward(value)); + } + + /** Add a named event to the span's timeline. + + @param name Event name. + */ + void + addEvent(std::string_view name) + { + span_->AddEvent(std::string(name)); + } + + /** Record an exception as a span event following OTel semantic + conventions, and mark the span status as error. + + @param e The exception to record. + */ + void + recordException(std::exception const& e) + { + span_->AddEvent( + "exception", + {{"exception.type", "std::exception"}, {"exception.message", std::string(e.what())}}); + span_->SetStatus(opentelemetry::trace::StatusCode::kError, e.what()); + } + + /** Return the current OTel context. + + Useful for creating child spans on a different thread by passing + this context to Telemetry::startSpan(name, parentContext). + */ + opentelemetry::context::Context + context() const + { + return opentelemetry::context::RuntimeContext::GetCurrent(); + } +}; + +} // namespace telemetry +} // namespace xrpl + +#endif // XRPL_ENABLE_TELEMETRY diff --git a/include/xrpl/telemetry/Telemetry.h b/include/xrpl/telemetry/Telemetry.h new file mode 100644 index 0000000000..b237f4a621 --- /dev/null +++ b/include/xrpl/telemetry/Telemetry.h @@ -0,0 +1,209 @@ +#pragma once + +/** Abstract interface for OpenTelemetry distributed tracing. + + Provides the Telemetry base class that all components use to create trace + spans. Two implementations exist: + + - TelemetryImpl (Telemetry.cpp): real OTel SDK integration, compiled + only when XRPL_ENABLE_TELEMETRY is defined and enabled at runtime. + - NullTelemetry (NullTelemetry.cpp): no-op stub used when telemetry is + disabled at compile time or runtime. + + The Setup struct holds all configuration parsed from the [telemetry] + section of xrpld.cfg. See TelemetryConfig.cpp for the parser and + cfg/xrpld-example.cfg for the available options. + + OTel SDK headers are conditionally included behind XRPL_ENABLE_TELEMETRY + so that builds without telemetry have zero dependency on opentelemetry-cpp. +*/ + +#include +#include + +#include +#include +#include +#include + +#ifdef XRPL_ENABLE_TELEMETRY +#include +#include +#include +#include +#endif + +namespace xrpl { +namespace telemetry { + +class Telemetry +{ +public: + /** Configuration parsed from the [telemetry] section of xrpld.cfg. + + All fields have sensible defaults so the section can be minimal + or omitted entirely. See TelemetryConfig.cpp for the parser. + */ + struct Setup + { + /** Master switch: true to enable tracing at runtime. */ + bool enabled = false; + + /** OTel resource attribute `service.name`. */ + std::string serviceName = "rippled"; + + /** OTel resource attribute `service.version` (set from BuildInfo). */ + std::string serviceVersion; + + /** OTel resource attribute `service.instance.id` (defaults to node + public key). */ + std::string serviceInstanceId; + + /** Exporter type: currently only "otlp_http" is supported. */ + std::string exporterType = "otlp_http"; + + /** OTLP/HTTP endpoint URL where spans are sent. */ + std::string exporterEndpoint = "http://localhost:4318/v1/traces"; + + /** Whether to use TLS for the exporter connection. */ + bool useTls = false; + + /** Path to a CA certificate bundle for TLS verification. */ + std::string tlsCertPath; + + /** Head-based sampling ratio in [0.0, 1.0]. 1.0 = trace everything. */ + double samplingRatio = 1.0; + + /** Maximum number of spans per batch export. */ + std::uint32_t batchSize = 512; + + /** Delay between batch exports. */ + std::chrono::milliseconds batchDelay{5000}; + + /** Maximum number of spans queued before dropping. */ + std::uint32_t maxQueueSize = 2048; + + /** Network identifier, added as an OTel resource attribute. */ + std::uint32_t networkId = 0; + + /** Network type label (e.g. "mainnet", "testnet", "devnet"). */ + std::string networkType = "mainnet"; + + /** Enable tracing for transaction processing. */ + bool traceTransactions = true; + + /** Enable tracing for consensus rounds. */ + bool traceConsensus = true; + + /** Enable tracing for RPC request handling. */ + bool traceRpc = true; + + /** Enable tracing for peer-to-peer messages (disabled by default + due to high volume). */ + bool tracePeer = false; + + /** Enable tracing for ledger close/accept. */ + bool traceLedger = true; + }; + + virtual ~Telemetry() = default; + + /** Initialize the tracing pipeline (exporter, processor, provider). + Call after construction. + */ + virtual void + start() = 0; + + /** Flush pending spans and shut down the tracing pipeline. + Call before destruction. + */ + virtual void + stop() = 0; + + /** @return true if this instance is actively exporting spans. */ + virtual bool + isEnabled() const = 0; + + /** @return true if transaction processing should be traced. */ + virtual bool + shouldTraceTransactions() const = 0; + + /** @return true if consensus rounds should be traced. */ + virtual bool + shouldTraceConsensus() const = 0; + + /** @return true if RPC request handling should be traced. */ + virtual bool + shouldTraceRpc() const = 0; + + /** @return true if peer-to-peer messages should be traced. */ + virtual bool + shouldTracePeer() const = 0; + +#ifdef XRPL_ENABLE_TELEMETRY + /** Get or create a named tracer instance. + + @param name Tracer name used to identify the instrumentation library. + @return A shared pointer to the Tracer. + */ + virtual opentelemetry::nostd::shared_ptr + getTracer(std::string_view name = "rippled") = 0; + + /** Start a new span on the current thread's context. + + The span becomes a child of the current active span (if any) via + OpenTelemetry's context propagation. + + @param name Span name (typically "rpc.command."). + @param kind The span kind (defaults to kInternal). + @return A shared pointer to the new Span. + */ + virtual opentelemetry::nostd::shared_ptr + startSpan( + std::string_view name, + opentelemetry::trace::SpanKind kind = opentelemetry::trace::SpanKind::kInternal) = 0; + + /** Start a new span with an explicit parent context. + + Use this overload when the parent span is not on the current + thread's context stack (e.g. cross-thread trace propagation). + + @param name Span name. + @param parentContext The parent span's context. + @param kind The span kind (defaults to kInternal). + @return A shared pointer to the new Span. + */ + virtual opentelemetry::nostd::shared_ptr + startSpan( + std::string_view name, + opentelemetry::context::Context const& parentContext, + opentelemetry::trace::SpanKind kind = opentelemetry::trace::SpanKind::kInternal) = 0; +#endif +}; + +/** Create a Telemetry instance. + + Returns a TelemetryImpl when setup.enabled is true, or a + NullTelemetry no-op stub otherwise. + + @param setup Configuration from the [telemetry] config section. + @param journal Journal for log output during initialization. +*/ +std::unique_ptr +make_Telemetry(Telemetry::Setup const& setup, beast::Journal journal); + +/** Parse the [telemetry] config section into a Setup struct. + + @param section The [telemetry] config section. + @param nodePublicKey Node public key, used as default instance ID. + @param version Build version string. + @return A populated Setup struct with defaults for missing values. +*/ +Telemetry::Setup +setup_Telemetry( + Section const& section, + std::string const& nodePublicKey, + std::string const& version); + +} // namespace telemetry +} // namespace xrpl diff --git a/src/libxrpl/telemetry/NullTelemetry.cpp b/src/libxrpl/telemetry/NullTelemetry.cpp new file mode 100644 index 0000000000..faa81590cb --- /dev/null +++ b/src/libxrpl/telemetry/NullTelemetry.cpp @@ -0,0 +1,121 @@ +/** No-op implementation of the Telemetry interface. + + Always compiled (regardless of XRPL_ENABLE_TELEMETRY). Provides the + make_Telemetry() factory when telemetry is compiled out (#ifndef), which + unconditionally returns a NullTelemetry that does nothing. + + When XRPL_ENABLE_TELEMETRY IS defined, the OTel virtual methods + (getTracer, startSpan) return noop tracers/spans. The make_Telemetry() + factory in this file is not used in that case -- Telemetry.cpp provides + its own factory that can return the real TelemetryImpl. +*/ + +#include + +#ifdef XRPL_ENABLE_TELEMETRY +#include +#endif + +namespace xrpl { +namespace telemetry { + +namespace { + +/** No-op Telemetry that returns immediately from every method. + + Used as the sole implementation when XRPL_ENABLE_TELEMETRY is not + defined, or as a fallback when it is defined but enabled=0. +*/ +class NullTelemetry : public Telemetry +{ + /** Retained configuration (unused, kept for diagnostic access). */ + Setup const setup_; + +public: + explicit NullTelemetry(Setup const& setup) : setup_(setup) + { + } + + void + start() override + { + } + + void + stop() override + { + } + + bool + isEnabled() const override + { + return false; + } + + bool + shouldTraceTransactions() const override + { + return false; + } + + bool + shouldTraceConsensus() const override + { + return false; + } + + bool + shouldTraceRpc() const override + { + return false; + } + + bool + shouldTracePeer() const override + { + return false; + } + +#ifdef XRPL_ENABLE_TELEMETRY + opentelemetry::nostd::shared_ptr + getTracer(std::string_view) override + { + static auto noopTracer = opentelemetry::nostd::shared_ptr( + new opentelemetry::trace::NoopTracer()); + return noopTracer; + } + + opentelemetry::nostd::shared_ptr + startSpan(std::string_view, opentelemetry::trace::SpanKind) override + { + return opentelemetry::nostd::shared_ptr( + new opentelemetry::trace::NoopSpan(nullptr)); + } + + opentelemetry::nostd::shared_ptr + startSpan( + std::string_view, + opentelemetry::context::Context const&, + opentelemetry::trace::SpanKind) override + { + return opentelemetry::nostd::shared_ptr( + new opentelemetry::trace::NoopSpan(nullptr)); + } +#endif +}; + +} // namespace + +/** Factory used when XRPL_ENABLE_TELEMETRY is not defined. + Unconditionally returns a NullTelemetry instance. +*/ +#ifndef XRPL_ENABLE_TELEMETRY +std::unique_ptr +make_Telemetry(Telemetry::Setup const& setup, beast::Journal) +{ + return std::make_unique(setup); +} +#endif + +} // namespace telemetry +} // namespace xrpl diff --git a/src/libxrpl/telemetry/Telemetry.cpp b/src/libxrpl/telemetry/Telemetry.cpp new file mode 100644 index 0000000000..f2d6fa39ed --- /dev/null +++ b/src/libxrpl/telemetry/Telemetry.cpp @@ -0,0 +1,279 @@ +/** OpenTelemetry SDK implementation of the Telemetry interface. + + Compiled only when XRPL_ENABLE_TELEMETRY is defined (via CMake + telemetry=ON). Contains: + + - TelemetryImpl: configures the OTel SDK with an OTLP/HTTP exporter, + batch span processor, trace-ID-ratio sampler, and resource attributes. + - NullTelemetryOtel: no-op fallback used when telemetry is compiled in + but disabled at runtime (enabled=0 in config). + - make_Telemetry(): factory that selects the appropriate implementation. +*/ + +#ifdef XRPL_ENABLE_TELEMETRY + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace xrpl { +namespace telemetry { + +namespace { + +namespace trace_api = opentelemetry::trace; +namespace trace_sdk = opentelemetry::sdk::trace; +namespace otlp_http = opentelemetry::exporter::otlp; +namespace resource = opentelemetry::sdk::resource; + +/** No-op implementation used when XRPL_ENABLE_TELEMETRY is defined but + setup.enabled is false at runtime. + + Lives in the anonymous namespace so there is no ODR conflict with the + NullTelemetry in NullTelemetry.cpp. +*/ +class NullTelemetryOtel : public Telemetry +{ + /** Retained configuration (unused, kept for diagnostic access). */ + Setup const setup_; + +public: + explicit NullTelemetryOtel(Setup const& setup) : setup_(setup) + { + } + + void + start() override + { + } + + void + stop() override + { + } + + bool + isEnabled() const override + { + return false; + } + + bool + shouldTraceTransactions() const override + { + return false; + } + + bool + shouldTraceConsensus() const override + { + return false; + } + + bool + shouldTraceRpc() const override + { + return false; + } + + bool + shouldTracePeer() const override + { + return false; + } + + opentelemetry::nostd::shared_ptr + getTracer(std::string_view) override + { + static auto noopTracer = + opentelemetry::nostd::shared_ptr(new trace_api::NoopTracer()); + return noopTracer; + } + + opentelemetry::nostd::shared_ptr + startSpan(std::string_view, trace_api::SpanKind) override + { + return opentelemetry::nostd::shared_ptr(new trace_api::NoopSpan(nullptr)); + } + + opentelemetry::nostd::shared_ptr + startSpan(std::string_view, opentelemetry::context::Context const&, trace_api::SpanKind) + override + { + return opentelemetry::nostd::shared_ptr(new trace_api::NoopSpan(nullptr)); + } +}; + +/** Full OTel SDK implementation that exports trace spans via OTLP/HTTP. + + Configures an OTLP/HTTP exporter, batch span processor, + TraceIdRatioBasedSampler, and resource attributes on start(). +*/ +class TelemetryImpl : public Telemetry +{ + /** Configuration from the [telemetry] config section. */ + Setup const setup_; + + /** Journal used for log output during start/stop. */ + beast::Journal const journal_; + + /** The SDK TracerProvider that owns the export pipeline. + + Held as std::shared_ptr so we can call ForceFlush() on shutdown. + Wrapped in a nostd::shared_ptr when registered as the global provider. + */ + std::shared_ptr sdkProvider_; + +public: + TelemetryImpl(Setup const& setup, beast::Journal journal) : setup_(setup), journal_(journal) + { + } + + void + start() override + { + JLOG(journal_.info()) << "Telemetry starting: endpoint=" << setup_.exporterEndpoint + << " sampling=" << setup_.samplingRatio; + + // Configure OTLP HTTP exporter + otlp_http::OtlpHttpExporterOptions exporterOpts; + exporterOpts.url = setup_.exporterEndpoint; + if (setup_.useTls) + exporterOpts.ssl_ca_cert_path = setup_.tlsCertPath; + + auto exporter = otlp_http::OtlpHttpExporterFactory::Create(exporterOpts); + + // Configure batch processor + trace_sdk::BatchSpanProcessorOptions processorOpts; + processorOpts.max_queue_size = setup_.maxQueueSize; + processorOpts.schedule_delay_millis = std::chrono::milliseconds(setup_.batchDelay); + processorOpts.max_export_batch_size = setup_.batchSize; + + auto processor = + trace_sdk::BatchSpanProcessorFactory::Create(std::move(exporter), processorOpts); + + // Configure resource attributes + auto resourceAttrs = resource::Resource::Create({ + {resource::SemanticConventions::kServiceName, setup_.serviceName}, + {resource::SemanticConventions::kServiceVersion, setup_.serviceVersion}, + {resource::SemanticConventions::kServiceInstanceId, setup_.serviceInstanceId}, + {"xrpl.network.id", static_cast(setup_.networkId)}, + {"xrpl.network.type", setup_.networkType}, + }); + + // Configure sampler + auto sampler = std::make_unique(setup_.samplingRatio); + + // Create TracerProvider + sdkProvider_ = trace_sdk::TracerProviderFactory::Create( + std::move(processor), resourceAttrs, std::move(sampler)); + + // Set as global provider + trace_api::Provider::SetTracerProvider( + opentelemetry::nostd::shared_ptr(sdkProvider_)); + + JLOG(journal_.info()) << "Telemetry started successfully"; + } + + void + stop() override + { + JLOG(journal_.info()) << "Telemetry stopping"; + if (sdkProvider_) + { + // Force flush before shutdown + sdkProvider_->ForceFlush(); + sdkProvider_.reset(); + trace_api::Provider::SetTracerProvider( + opentelemetry::nostd::shared_ptr( + new trace_api::NoopTracerProvider())); + } + JLOG(journal_.info()) << "Telemetry stopped"; + } + + bool + isEnabled() const override + { + return true; + } + + bool + shouldTraceTransactions() const override + { + return setup_.traceTransactions; + } + + bool + shouldTraceConsensus() const override + { + return setup_.traceConsensus; + } + + bool + shouldTraceRpc() const override + { + return setup_.traceRpc; + } + + bool + shouldTracePeer() const override + { + return setup_.tracePeer; + } + + opentelemetry::nostd::shared_ptr + getTracer(std::string_view name) override + { + if (!sdkProvider_) + return trace_api::Provider::GetTracerProvider()->GetTracer(std::string(name)); + return sdkProvider_->GetTracer(std::string(name)); + } + + opentelemetry::nostd::shared_ptr + startSpan(std::string_view name, trace_api::SpanKind kind) override + { + auto tracer = getTracer("rippled"); + trace_api::StartSpanOptions opts; + opts.kind = kind; + return tracer->StartSpan(std::string(name), opts); + } + + opentelemetry::nostd::shared_ptr + startSpan( + std::string_view name, + opentelemetry::context::Context const& parentContext, + trace_api::SpanKind kind) override + { + auto tracer = getTracer("rippled"); + trace_api::StartSpanOptions opts; + opts.kind = kind; + opts.parent = parentContext; + return tracer->StartSpan(std::string(name), opts); + } +}; + +} // namespace + +std::unique_ptr +make_Telemetry(Telemetry::Setup const& setup, beast::Journal journal) +{ + if (setup.enabled) + return std::make_unique(setup, journal); + return std::make_unique(setup); +} + +} // namespace telemetry +} // namespace xrpl + +#endif // XRPL_ENABLE_TELEMETRY diff --git a/src/libxrpl/telemetry/TelemetryConfig.cpp b/src/libxrpl/telemetry/TelemetryConfig.cpp new file mode 100644 index 0000000000..c5b25023e4 --- /dev/null +++ b/src/libxrpl/telemetry/TelemetryConfig.cpp @@ -0,0 +1,52 @@ +/** Parser for the [telemetry] section of xrpld.cfg. + + Reads configuration values from the config file and populates a + Telemetry::Setup struct. All options have sensible defaults so the + section can be minimal or omitted entirely. + + See cfg/xrpld-example.cfg for the full list of available options. +*/ + +#include + +namespace xrpl { +namespace telemetry { + +Telemetry::Setup +setup_Telemetry( + Section const& section, + std::string const& nodePublicKey, + std::string const& version) +{ + Telemetry::Setup setup; + + setup.enabled = section.value_or("enabled", 0) != 0; + setup.serviceName = section.value_or("service_name", "rippled"); + setup.serviceVersion = version; + setup.serviceInstanceId = section.value_or("service_instance_id", nodePublicKey); + + setup.exporterType = section.value_or("exporter", "otlp_http"); + setup.exporterEndpoint = + section.value_or("endpoint", "http://localhost:4318/v1/traces"); + + setup.useTls = section.value_or("use_tls", 0) != 0; + setup.tlsCertPath = section.value_or("tls_ca_cert", ""); + + setup.samplingRatio = section.value_or("sampling_ratio", 1.0); + + setup.batchSize = section.value_or("batch_size", 512u); + setup.batchDelay = + std::chrono::milliseconds{section.value_or("batch_delay_ms", 5000u)}; + setup.maxQueueSize = section.value_or("max_queue_size", 2048u); + + setup.traceTransactions = section.value_or("trace_transactions", 1) != 0; + setup.traceConsensus = section.value_or("trace_consensus", 1) != 0; + setup.traceRpc = section.value_or("trace_rpc", 1) != 0; + setup.tracePeer = section.value_or("trace_peer", 0) != 0; + setup.traceLedger = section.value_or("trace_ledger", 1) != 0; + + return setup; +} + +} // namespace telemetry +} // namespace xrpl diff --git a/src/xrpld/app/main/Application.cpp b/src/xrpld/app/main/Application.cpp index 3e3d87dcd5..c9cd91a478 100644 --- a/src/xrpld/app/main/Application.cpp +++ b/src/xrpld/app/main/Application.cpp @@ -31,7 +31,6 @@ #include #include -#include #include #include #include @@ -52,6 +51,7 @@ #include #include #include +#include #include #include @@ -147,6 +147,7 @@ public: beast::Journal m_journal; std::unique_ptr perfLog_; + std::unique_ptr telemetry_; Application::MutexType m_masterMutex; // Required by the SHAMapStore @@ -258,6 +259,14 @@ public: logs_->journal("PerfLog"), [this] { signalStop("PerfLog"); })) + , telemetry_( + telemetry::make_Telemetry( + telemetry::setup_Telemetry( + config_->section("telemetry"), + "", // nodePublicKey not yet available at this point + BuildInfo::getVersionString()), + logs_->journal("Telemetry"))) + , m_txMaster(*this) , m_collectorManager( @@ -618,6 +627,12 @@ public: return *perfLog_; } + telemetry::Telemetry& + getTelemetry() override + { + return *telemetry_; + } + NodeCache& getTempNodeCache() override { @@ -1054,8 +1069,6 @@ public: << "; size after: " << cachedSLEs_.size(); } - mallocTrim("doSweep", m_journal); - // Set timer to do another sweep later. setSweepTimer(); } @@ -1472,6 +1485,7 @@ ApplicationImp::start(bool withTimers) ledgerCleaner_->start(); perfLog_->start(); + telemetry_->start(); } void @@ -1562,6 +1576,7 @@ ApplicationImp::run() ledgerCleaner_->stop(); m_nodeStore->stop(); perfLog_->stop(); + telemetry_->stop(); JLOG(m_journal.info()) << "Done."; }