mirror of
https://github.com/XRPLF/rippled.git
synced 2026-02-27 09:12:33 +00:00
Compare commits
13 Commits
pratik/ote
...
pratik/Ope
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
96126d183f | ||
|
|
acea62523d | ||
|
|
07e82d3d1c | ||
|
|
8fb2f8fc1a | ||
|
|
4c298e3c4f | ||
|
|
37331f39b5 | ||
|
|
75332cf366 | ||
|
|
9bb7ea04cb | ||
|
|
441ccd514c | ||
|
|
6e8f0624ce | ||
|
|
bff83a2b92 | ||
|
|
6207f8dd42 | ||
|
|
94797a4258 |
@@ -32,6 +32,8 @@ libxrpl.server > xrpl.server
|
||||
libxrpl.shamap > xrpl.basics
|
||||
libxrpl.shamap > xrpl.protocol
|
||||
libxrpl.shamap > xrpl.shamap
|
||||
libxrpl.telemetry > xrpl.basics
|
||||
libxrpl.telemetry > xrpl.telemetry
|
||||
libxrpl.tx > xrpl.basics
|
||||
libxrpl.tx > xrpl.conditions
|
||||
libxrpl.tx > xrpl.core
|
||||
@@ -206,6 +208,7 @@ xrpl.server > xrpl.shamap
|
||||
xrpl.shamap > xrpl.basics
|
||||
xrpl.shamap > xrpl.nodestore
|
||||
xrpl.shamap > xrpl.protocol
|
||||
xrpl.telemetry > xrpl.basics
|
||||
xrpl.tx > xrpl.basics
|
||||
xrpl.tx > xrpl.core
|
||||
xrpl.tx > xrpl.ledger
|
||||
@@ -224,6 +227,7 @@ xrpld.app > xrpl.rdb
|
||||
xrpld.app > xrpl.resource
|
||||
xrpld.app > xrpl.server
|
||||
xrpld.app > xrpl.shamap
|
||||
xrpld.app > xrpl.telemetry
|
||||
xrpld.app > xrpl.tx
|
||||
xrpld.consensus > xrpl.basics
|
||||
xrpld.consensus > xrpl.json
|
||||
@@ -255,6 +259,7 @@ xrpld.perflog > xrpl.json
|
||||
xrpld.rpc > xrpl.basics
|
||||
xrpld.rpc > xrpl.core
|
||||
xrpld.rpc > xrpld.core
|
||||
xrpld.rpc > xrpld.telemetry
|
||||
xrpld.rpc > xrpl.json
|
||||
xrpld.rpc > xrpl.ledger
|
||||
xrpld.rpc > xrpl.net
|
||||
@@ -265,3 +270,4 @@ xrpld.rpc > xrpl.resource
|
||||
xrpld.rpc > xrpl.server
|
||||
xrpld.rpc > xrpl.tx
|
||||
xrpld.shamap > xrpl.shamap
|
||||
xrpld.telemetry > xrpl.telemetry
|
||||
|
||||
@@ -101,7 +101,7 @@ jobs:
|
||||
steps:
|
||||
- name: Cleanup workspace (macOS and Windows)
|
||||
if: ${{ runner.os == 'macOS' || runner.os == 'Windows' }}
|
||||
uses: XRPLF/actions/cleanup-workspace@c7d9ce5ebb03c752a354889ecd870cadfc2b1cd4
|
||||
uses: XRPLF/actions/cleanup-workspace@cf0433aa74563aead044a1e395610c96d65a37cf
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
2
.github/workflows/upload-conan-deps.yml
vendored
2
.github/workflows/upload-conan-deps.yml
vendored
@@ -64,7 +64,7 @@ jobs:
|
||||
steps:
|
||||
- name: Cleanup workspace (macOS and Windows)
|
||||
if: ${{ runner.os == 'macOS' || runner.os == 'Windows' }}
|
||||
uses: XRPLF/actions/cleanup-workspace@c7d9ce5ebb03c752a354889ecd870cadfc2b1cd4
|
||||
uses: XRPLF/actions/cleanup-workspace@cf0433aa74563aead044a1e395610c96d65a37cf
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
@@ -126,6 +126,18 @@ if (rocksdb)
|
||||
target_link_libraries(xrpl_libs INTERFACE RocksDB::rocksdb)
|
||||
endif ()
|
||||
|
||||
# OpenTelemetry distributed tracing (optional).
|
||||
# When ON, links against opentelemetry-cpp and defines XRPL_ENABLE_TELEMETRY
|
||||
# so that tracing macros in TracingInstrumentation.h are compiled in.
|
||||
# When OFF (default), all tracing code compiles to no-ops with zero overhead.
|
||||
# Enable via: conan install -o telemetry=True, or cmake -Dtelemetry=ON.
|
||||
option(telemetry "Enable OpenTelemetry tracing" OFF)
|
||||
if (telemetry)
|
||||
find_package(opentelemetry-cpp CONFIG REQUIRED)
|
||||
add_compile_definitions(XRPL_ENABLE_TELEMETRY)
|
||||
message(STATUS "OpenTelemetry tracing enabled")
|
||||
endif ()
|
||||
|
||||
# Work around changes to Conan recipe for now.
|
||||
if (TARGET nudb::core)
|
||||
set(nudb nudb::core)
|
||||
|
||||
@@ -1529,3 +1529,46 @@ validators.txt
|
||||
# set to ssl_verify to 0.
|
||||
[ssl_verify]
|
||||
1
|
||||
#-------------------------------------------------------------------------------
|
||||
#
|
||||
# 11. Telemetry (OpenTelemetry Tracing)
|
||||
#
|
||||
#-------------------------------------------------------------------------------
|
||||
#
|
||||
# Enables distributed tracing via OpenTelemetry. Requires building with
|
||||
# -DXRPL_ENABLE_TELEMETRY=ON (telemetry Conan option).
|
||||
#
|
||||
# [telemetry]
|
||||
#
|
||||
# enabled=0
|
||||
#
|
||||
# Enable or disable telemetry at runtime. Default: 0 (disabled).
|
||||
#
|
||||
# endpoint=http://localhost:4318/v1/traces
|
||||
#
|
||||
# The OpenTelemetry Collector endpoint (OTLP/HTTP). Default: http://localhost:4318/v1/traces.
|
||||
#
|
||||
# exporter=otlp_http
|
||||
#
|
||||
# Exporter type: otlp_http. Default: otlp_http.
|
||||
#
|
||||
# sampling_ratio=1.0
|
||||
#
|
||||
# Fraction of traces to sample (0.0 to 1.0). Default: 1.0 (all traces).
|
||||
#
|
||||
# trace_rpc=1
|
||||
#
|
||||
# Enable RPC request tracing. Default: 1.
|
||||
#
|
||||
# trace_transactions=1
|
||||
#
|
||||
# Enable transaction lifecycle tracing. Default: 1.
|
||||
#
|
||||
# trace_consensus=1
|
||||
#
|
||||
# Enable consensus round tracing. Default: 1.
|
||||
#
|
||||
# trace_peer=0
|
||||
#
|
||||
# Enable peer message tracing (high volume). Default: 0.
|
||||
#
|
||||
|
||||
@@ -119,6 +119,17 @@ target_link_libraries(
|
||||
add_module(xrpl tx)
|
||||
target_link_libraries(xrpl.libxrpl.tx PUBLIC xrpl.libxrpl.ledger)
|
||||
|
||||
# Telemetry module — OpenTelemetry distributed tracing support.
|
||||
# Sources: include/xrpl/telemetry/ (headers), src/libxrpl/telemetry/ (impl).
|
||||
# When telemetry=ON, links the Conan-provided umbrella target
|
||||
# opentelemetry-cpp::opentelemetry-cpp (individual component targets like
|
||||
# ::api, ::sdk are not available in the Conan package).
|
||||
add_module(xrpl telemetry)
|
||||
target_link_libraries(xrpl.libxrpl.telemetry PUBLIC xrpl.libxrpl.basics xrpl.libxrpl.beast)
|
||||
if (telemetry)
|
||||
target_link_libraries(xrpl.libxrpl.telemetry PUBLIC opentelemetry-cpp::opentelemetry-cpp)
|
||||
endif ()
|
||||
|
||||
add_library(xrpl.libxrpl)
|
||||
set_target_properties(xrpl.libxrpl PROPERTIES OUTPUT_NAME xrpl)
|
||||
|
||||
@@ -144,6 +155,7 @@ target_link_modules(
|
||||
resource
|
||||
server
|
||||
shamap
|
||||
telemetry
|
||||
tx)
|
||||
|
||||
# All headers in libxrpl are in modules.
|
||||
|
||||
@@ -32,6 +32,7 @@ install(TARGETS common
|
||||
xrpl.libxrpl.resource
|
||||
xrpl.libxrpl.server
|
||||
xrpl.libxrpl.shamap
|
||||
xrpl.libxrpl.telemetry
|
||||
xrpl.libxrpl.tx
|
||||
antithesis-sdk-cpp
|
||||
EXPORT XrplExports
|
||||
|
||||
11
conan.lock
11
conan.lock
@@ -10,10 +10,13 @@
|
||||
"rocksdb/10.5.1#4a197eca381a3e5ae8adf8cffa5aacd0%1765850186.86",
|
||||
"re2/20230301#ca3b241baec15bd31ea9187150e0b333%1765850148.103",
|
||||
"protobuf/6.32.1#f481fd276fc23a33b85a3ed1e898b693%1765850161.038",
|
||||
"openssl/3.5.5#05a4ac5b7323f7a329b2db1391d9941f%1769599205.414",
|
||||
"opentelemetry-cpp/1.18.0#efd9851e173f8a13b9c7d35232de8cf1%1750409186.472",
|
||||
"openssl/3.5.5#05a4ac5b7323f7a329b2db1391d9941f%1770229825.601",
|
||||
"nudb/2.0.9#0432758a24204da08fee953ec9ea03cb%1769436073.32",
|
||||
"nlohmann_json/3.11.3#45828be26eb619a2e04ca517bb7b828d%1701220705.259",
|
||||
"lz4/1.10.0#59fc63cac7f10fbe8e05c7e62c2f3504%1765850143.914",
|
||||
"libiconv/1.17#1e65319e945f2d31941a9d28cc13c058%1765842973.492",
|
||||
"libcurl/8.18.0#364bc3755cb9ef84ed9a7ae9c7efc1c1%1770984390.024",
|
||||
"libbacktrace/cci.20210118#a7691bfccd8caaf66309df196790a5a1%1765842973.03",
|
||||
"libarchive/3.8.1#ffee18995c706e02bf96e7a2f7042e0d%1765850144.736",
|
||||
"jemalloc/5.3.0#e951da9cf599e956cebc117880d2d9f8%1729241615.244",
|
||||
@@ -30,9 +33,15 @@
|
||||
"zlib/1.3.1#b8bc2603263cf7eccbd6e17e66b0ed76%1765850150.075",
|
||||
"strawberryperl/5.32.1.1#707032463aa0620fa17ec0d887f5fe41%1765850165.196",
|
||||
"protobuf/6.32.1#f481fd276fc23a33b85a3ed1e898b693%1765850161.038",
|
||||
"pkgconf/2.5.1#93c2051284cba1279494a43a4fcfeae2%1757684701.089",
|
||||
"opentelemetry-proto/1.4.0#4096a3b05916675ef9628f3ffd571f51%1732731336.11",
|
||||
"ninja/1.13.2#c8c5dc2a52ed6e4e42a66d75b4717ceb%1764096931.974",
|
||||
"nasm/2.16.01#31e26f2ee3c4346ecd347911bd126904%1765850144.707",
|
||||
"msys2/cci.latest#eea83308ad7e9023f7318c60d5a9e6cb%1770199879.083",
|
||||
"meson/1.10.0#60786758ea978964c24525de19603cf4%1768294926.103",
|
||||
"m4/1.4.19#70dc8bbb33e981d119d2acc0175cf381%1763158052.846",
|
||||
"libtool/2.4.7#14e7739cc128bc1623d2ed318008e47e%1755679003.847",
|
||||
"gnu-config/cci.20210814#466e9d4d7779e1c142443f7ea44b4284%1762363589.329",
|
||||
"cmake/4.2.0#ae0a44f44a1ef9ab68fd4b3e9a1f8671%1765850153.937",
|
||||
"cmake/3.31.10#313d16a1aa16bbdb2ca0792467214b76%1765850153.479",
|
||||
"b2/5.3.3#107c15377719889654eb9a162a673975%1765850144.355",
|
||||
|
||||
@@ -22,6 +22,7 @@ class Xrpl(ConanFile):
|
||||
"rocksdb": [True, False],
|
||||
"shared": [True, False],
|
||||
"static": [True, False],
|
||||
"telemetry": [True, False],
|
||||
"tests": [True, False],
|
||||
"unity": [True, False],
|
||||
"xrpld": [True, False],
|
||||
@@ -54,6 +55,7 @@ class Xrpl(ConanFile):
|
||||
"rocksdb": True,
|
||||
"shared": False,
|
||||
"static": True,
|
||||
"telemetry": True,
|
||||
"tests": False,
|
||||
"unity": False,
|
||||
"xrpld": False,
|
||||
@@ -140,6 +142,10 @@ class Xrpl(ConanFile):
|
||||
self.requires("jemalloc/5.3.0")
|
||||
if self.options.rocksdb:
|
||||
self.requires("rocksdb/10.5.1")
|
||||
# OpenTelemetry C++ SDK for distributed tracing (optional).
|
||||
# Provides OTLP/HTTP exporter, batch span processor, and trace API.
|
||||
if self.options.telemetry:
|
||||
self.requires("opentelemetry-cpp/1.18.0")
|
||||
self.requires("xxhash/0.8.3", **transitive_headers_opt)
|
||||
|
||||
exports_sources = (
|
||||
@@ -168,6 +174,7 @@ class Xrpl(ConanFile):
|
||||
tc.variables["rocksdb"] = self.options.rocksdb
|
||||
tc.variables["BUILD_SHARED_LIBS"] = self.options.shared
|
||||
tc.variables["static"] = self.options.static
|
||||
tc.variables["telemetry"] = self.options.telemetry
|
||||
tc.variables["unity"] = self.options.unity
|
||||
tc.variables["xrpld"] = self.options.xrpld
|
||||
tc.generate()
|
||||
@@ -220,3 +227,5 @@ class Xrpl(ConanFile):
|
||||
]
|
||||
if self.options.rocksdb:
|
||||
libxrpl.requires.append("rocksdb::librocksdb")
|
||||
if self.options.telemetry:
|
||||
libxrpl.requires.append("opentelemetry-cpp::opentelemetry-cpp")
|
||||
|
||||
60
docker/telemetry/docker-compose.yml
Normal file
60
docker/telemetry/docker-compose.yml
Normal file
@@ -0,0 +1,60 @@
|
||||
# Docker Compose stack for rippled OpenTelemetry observability.
|
||||
#
|
||||
# Provides three services for local development:
|
||||
# - otel-collector: receives OTLP traces from rippled, batches and
|
||||
# forwards them to Jaeger. Listens on ports 4317 (gRPC) and 4318 (HTTP).
|
||||
# - jaeger: all-in-one tracing backend with UI on port 16686.
|
||||
# - grafana: dashboards on port 3000, pre-configured with Jaeger datasource.
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f docker/telemetry/docker-compose.yml up -d
|
||||
#
|
||||
# Configure rippled to export traces by adding to xrpld.cfg:
|
||||
# [telemetry]
|
||||
# enabled=1
|
||||
# endpoint=http://localhost:4318/v1/traces
|
||||
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
otel-collector:
|
||||
image: otel/opentelemetry-collector-contrib:latest
|
||||
command: ["--config=/etc/otel-collector-config.yaml"]
|
||||
ports:
|
||||
- "4317:4317" # OTLP gRPC
|
||||
- "4318:4318" # OTLP HTTP
|
||||
- "13133:13133" # Health check
|
||||
volumes:
|
||||
- ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro
|
||||
depends_on:
|
||||
- jaeger
|
||||
networks:
|
||||
- rippled-telemetry
|
||||
|
||||
jaeger:
|
||||
image: jaegertracing/all-in-one:latest
|
||||
environment:
|
||||
- COLLECTOR_OTLP_ENABLED=true
|
||||
ports:
|
||||
- "16686:16686" # Jaeger UI
|
||||
- "14250:14250" # gRPC
|
||||
networks:
|
||||
- rippled-telemetry
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:latest
|
||||
environment:
|
||||
- GF_AUTH_ANONYMOUS_ENABLED=true
|
||||
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
|
||||
ports:
|
||||
- "3000:3000"
|
||||
volumes:
|
||||
- ./grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
depends_on:
|
||||
- jaeger
|
||||
networks:
|
||||
- rippled-telemetry
|
||||
|
||||
networks:
|
||||
rippled-telemetry:
|
||||
driver: bridge
|
||||
@@ -0,0 +1,12 @@
|
||||
# Grafana datasource provisioning for the rippled telemetry stack.
|
||||
# Auto-configures Jaeger as a trace data source on Grafana startup.
|
||||
# Access Grafana at http://localhost:3000, then use Explore -> Jaeger
|
||||
# to browse rippled traces.
|
||||
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Jaeger
|
||||
type: jaeger
|
||||
access: proxy
|
||||
url: http://jaeger:16686
|
||||
33
docker/telemetry/otel-collector-config.yaml
Normal file
33
docker/telemetry/otel-collector-config.yaml
Normal file
@@ -0,0 +1,33 @@
|
||||
# OpenTelemetry Collector configuration for rippled development.
|
||||
#
|
||||
# Pipeline: OTLP receiver -> batch processor -> debug exporter + Jaeger.
|
||||
# rippled sends traces via OTLP/HTTP to port 4318. The collector batches
|
||||
# them and forwards to Jaeger via OTLP/gRPC on the Docker network.
|
||||
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
processors:
|
||||
batch:
|
||||
timeout: 1s
|
||||
send_batch_size: 100
|
||||
|
||||
exporters:
|
||||
debug:
|
||||
verbosity: detailed
|
||||
otlp/jaeger:
|
||||
endpoint: jaeger:4317
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [batch]
|
||||
exporters: [debug, otlp/jaeger]
|
||||
278
docs/build/telemetry.md
vendored
Normal file
278
docs/build/telemetry.md
vendored
Normal file
@@ -0,0 +1,278 @@
|
||||
# OpenTelemetry Tracing for Rippled
|
||||
|
||||
This document explains how to build rippled with OpenTelemetry distributed tracing support, configure the runtime telemetry options, and set up the observability backend to view traces.
|
||||
|
||||
- [OpenTelemetry Tracing for Rippled](#opentelemetry-tracing-for-rippled)
|
||||
- [Overview](#overview)
|
||||
- [Building with Telemetry](#building-with-telemetry)
|
||||
- [Summary](#summary)
|
||||
- [Build steps](#build-steps)
|
||||
- [Install dependencies](#install-dependencies)
|
||||
- [Call CMake](#call-cmake)
|
||||
- [Build](#build)
|
||||
- [Building without telemetry](#building-without-telemetry)
|
||||
- [Runtime Configuration](#runtime-configuration)
|
||||
- [Configuration options](#configuration-options)
|
||||
- [Observability Stack](#observability-stack)
|
||||
- [Start the stack](#start-the-stack)
|
||||
- [Verify the stack](#verify-the-stack)
|
||||
- [View traces in Jaeger](#view-traces-in-jaeger)
|
||||
- [Running Tests](#running-tests)
|
||||
- [Troubleshooting](#troubleshooting)
|
||||
- [No traces appear in Jaeger](#no-traces-appear-in-jaeger)
|
||||
- [Conan lockfile error](#conan-lockfile-error)
|
||||
- [CMake target not found](#cmake-target-not-found)
|
||||
- [Architecture](#architecture)
|
||||
- [Key files](#key-files)
|
||||
- [Conditional compilation](#conditional-compilation)
|
||||
|
||||
## Overview
|
||||
|
||||
Rippled supports optional [OpenTelemetry](https://opentelemetry.io/) distributed tracing.
|
||||
When enabled, it instruments RPC requests with trace spans that are exported via
|
||||
OTLP/HTTP to an OpenTelemetry Collector, which forwards them to a tracing backend
|
||||
such as Jaeger.
|
||||
|
||||
Telemetry is **off by default** at both compile time and runtime:
|
||||
|
||||
- **Compile time**: The Conan option `telemetry` and CMake option `telemetry` must be set to `True`/`ON`.
|
||||
When disabled, all tracing macros compile to `((void)0)` with zero overhead.
|
||||
- **Runtime**: The `[telemetry]` config section must set `enabled=1`.
|
||||
When disabled at runtime, a no-op implementation is used.
|
||||
|
||||
## Building with Telemetry
|
||||
|
||||
### Summary
|
||||
|
||||
Follow the same instructions as mentioned in [BUILD.md](../../BUILD.md) but with the following changes:
|
||||
|
||||
1. Pass `-o telemetry=True` to `conan install` to pull the `opentelemetry-cpp` dependency.
|
||||
2. CMake will automatically pick up `telemetry=ON` from the Conan-generated toolchain.
|
||||
3. Build as usual.
|
||||
|
||||
---
|
||||
|
||||
### Build steps
|
||||
|
||||
```bash
|
||||
cd /path/to/rippled
|
||||
rm -rf .build
|
||||
mkdir .build
|
||||
cd .build
|
||||
```
|
||||
|
||||
#### Install dependencies
|
||||
|
||||
The `telemetry` option adds `opentelemetry-cpp/1.18.0` as a dependency.
|
||||
If the Conan lockfile does not yet include this package, bypass it with `--lockfile=""`.
|
||||
|
||||
```bash
|
||||
conan install .. \
|
||||
--output-folder . \
|
||||
--build missing \
|
||||
--settings build_type=Debug \
|
||||
-o telemetry=True \
|
||||
-o tests=True \
|
||||
-o xrpld=True \
|
||||
--lockfile=""
|
||||
```
|
||||
|
||||
> **Note**: The first build with telemetry may take longer as `opentelemetry-cpp`
|
||||
> and its transitive dependencies are compiled from source.
|
||||
|
||||
#### Call CMake
|
||||
|
||||
The Conan-generated toolchain file sets `telemetry=ON` automatically.
|
||||
No additional CMake flags are needed beyond the standard ones.
|
||||
|
||||
```bash
|
||||
cmake .. -G Ninja \
|
||||
-DCMAKE_TOOLCHAIN_FILE:FILEPATH=build/generators/conan_toolchain.cmake \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-Dtests=ON -Dxrpld=ON
|
||||
```
|
||||
|
||||
You should see in the CMake output:
|
||||
|
||||
```
|
||||
-- OpenTelemetry tracing enabled
|
||||
```
|
||||
|
||||
#### Build
|
||||
|
||||
```bash
|
||||
cmake --build . --parallel $(nproc)
|
||||
```
|
||||
|
||||
### Building without telemetry
|
||||
|
||||
Omit the `-o telemetry=True` option (or pass `-o telemetry=False`).
|
||||
The `opentelemetry-cpp` dependency will not be downloaded,
|
||||
the `XRPL_ENABLE_TELEMETRY` preprocessor define will not be set,
|
||||
and all tracing macros will compile to no-ops.
|
||||
The resulting binary is identical to one built before telemetry support was added.
|
||||
|
||||
## Runtime Configuration
|
||||
|
||||
Add a `[telemetry]` section to your `xrpld.cfg` file:
|
||||
|
||||
```ini
|
||||
[telemetry]
|
||||
enabled=1
|
||||
service_name=rippled
|
||||
endpoint=http://localhost:4318/v1/traces
|
||||
sampling_ratio=1.0
|
||||
trace_rpc=1
|
||||
trace_transactions=1
|
||||
trace_consensus=1
|
||||
trace_peer=0
|
||||
```
|
||||
|
||||
### Configuration options
|
||||
|
||||
| Option | Type | Default | Description |
|
||||
| --------------------- | ------ | --------------------------------- | -------------------------------------------------- |
|
||||
| `enabled` | int | `0` | Enable (`1`) or disable (`0`) telemetry at runtime |
|
||||
| `service_name` | string | `rippled` | Service name reported in traces |
|
||||
| `service_instance_id` | string | node public key | Unique instance identifier |
|
||||
| `exporter` | string | `otlp_http` | Exporter type |
|
||||
| `endpoint` | string | `http://localhost:4318/v1/traces` | OTLP/HTTP collector endpoint |
|
||||
| `use_tls` | int | `0` | Enable TLS for the exporter connection |
|
||||
| `tls_ca_cert` | string | (empty) | Path to CA certificate for TLS |
|
||||
| `sampling_ratio` | double | `1.0` | Fraction of traces to sample (`0.0` to `1.0`) |
|
||||
| `batch_size` | uint32 | `512` | Maximum spans per export batch |
|
||||
| `batch_delay_ms` | uint32 | `5000` | Maximum delay (ms) before flushing a batch |
|
||||
| `max_queue_size` | uint32 | `2048` | Maximum spans queued in memory |
|
||||
| `trace_rpc` | int | `1` | Enable RPC request tracing |
|
||||
| `trace_transactions` | int | `1` | Enable transaction lifecycle tracing |
|
||||
| `trace_consensus` | int | `1` | Enable consensus round tracing |
|
||||
| `trace_peer` | int | `0` | Enable peer message tracing (high volume) |
|
||||
| `trace_ledger` | int | `1` | Enable ledger close tracing |
|
||||
|
||||
## Observability Stack
|
||||
|
||||
A Docker Compose stack is provided in `docker/telemetry/` with three services:
|
||||
|
||||
| Service | Port | Purpose |
|
||||
| ------------------ | ---------------------------------------------- | ---------------------------------------------------- |
|
||||
| **OTel Collector** | `4317` (gRPC), `4318` (HTTP), `13133` (health) | Receives OTLP spans, batches, and forwards to Jaeger |
|
||||
| **Jaeger** | `16686` (UI) | Trace storage and visualization |
|
||||
| **Grafana** | `3000` | Dashboards (Jaeger pre-configured as datasource) |
|
||||
|
||||
### Start the stack
|
||||
|
||||
```bash
|
||||
docker compose -f docker/telemetry/docker-compose.yml up -d
|
||||
```
|
||||
|
||||
### Verify the stack
|
||||
|
||||
```bash
|
||||
# Collector health
|
||||
curl http://localhost:13133
|
||||
|
||||
# Jaeger UI
|
||||
open http://localhost:16686
|
||||
|
||||
# Grafana
|
||||
open http://localhost:3000
|
||||
```
|
||||
|
||||
### View traces in Jaeger
|
||||
|
||||
1. Open `http://localhost:16686` in a browser.
|
||||
2. Select the service name (e.g. `rippled`) from the **Service** dropdown.
|
||||
3. Click **Find Traces**.
|
||||
4. Click into any trace to see the span tree and attributes.
|
||||
|
||||
Traced RPC operations produce a span hierarchy like:
|
||||
|
||||
```
|
||||
rpc.request
|
||||
└── rpc.command.server_info (xrpl.rpc.command=server_info, xrpl.rpc.status=success)
|
||||
```
|
||||
|
||||
Each span includes attributes:
|
||||
|
||||
- `xrpl.rpc.command` — the RPC method name
|
||||
- `xrpl.rpc.version` — API version
|
||||
- `xrpl.rpc.role` — `admin` or `user`
|
||||
- `xrpl.rpc.status` — `success` or `error`
|
||||
|
||||
## Running Tests
|
||||
|
||||
Unit tests run with the telemetry-enabled build regardless of whether the
|
||||
observability stack is running. When no collector is available, the exporter
|
||||
silently drops spans with no impact on test results.
|
||||
|
||||
```bash
|
||||
# Run all RPC tests
|
||||
./xrpld --unittest=RPCCall,ServerInfo,AccountTx,LedgerRPC,Transaction --unittest-jobs $(nproc)
|
||||
|
||||
# Run the full test suite
|
||||
./xrpld --unittest --unittest-jobs $(nproc)
|
||||
```
|
||||
|
||||
To generate traces during manual testing, start rippled in standalone mode:
|
||||
|
||||
```bash
|
||||
./xrpld --conf /path/to/xrpld.cfg --standalone --start
|
||||
```
|
||||
|
||||
Then send RPC requests:
|
||||
|
||||
```bash
|
||||
curl -s -X POST http://127.0.0.1:5005/ \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"method":"server_info","params":[{}]}'
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### No traces appear in Jaeger
|
||||
|
||||
1. Confirm the OTel Collector is running: `docker compose -f docker/telemetry/docker-compose.yml ps`
|
||||
2. Check collector logs for errors: `docker compose -f docker/telemetry/docker-compose.yml logs otel-collector`
|
||||
3. Confirm `[telemetry] enabled=1` is set in the rippled config.
|
||||
4. Confirm `endpoint` points to the correct collector address (`http://localhost:4318/v1/traces`).
|
||||
5. Wait for the batch delay to elapse (default `5000` ms) before checking Jaeger.
|
||||
|
||||
### Conan lockfile error
|
||||
|
||||
If you see `ERROR: Requirement 'opentelemetry-cpp/1.18.0' not in lockfile 'requires'`,
|
||||
the lockfile was generated without the telemetry dependency.
|
||||
Pass `--lockfile=""` to bypass the lockfile, or regenerate it with telemetry enabled.
|
||||
|
||||
### CMake target not found
|
||||
|
||||
If CMake reports that `opentelemetry-cpp` targets are not found,
|
||||
ensure you ran `conan install` with `-o telemetry=True` and that the
|
||||
Conan-generated toolchain file is being used.
|
||||
The Conan package provides a single umbrella target
|
||||
`opentelemetry-cpp::opentelemetry-cpp` (not individual component targets).
|
||||
|
||||
## Architecture
|
||||
|
||||
### Key files
|
||||
|
||||
| File | Purpose |
|
||||
| ---------------------------------------------- | ----------------------------------------------------------- |
|
||||
| `include/xrpl/telemetry/Telemetry.h` | Abstract telemetry interface and `Setup` struct |
|
||||
| `include/xrpl/telemetry/SpanGuard.h` | RAII span guard (activates scope, ends span on destruction) |
|
||||
| `src/libxrpl/telemetry/Telemetry.cpp` | OTel-backed implementation (`TelemetryImpl`) |
|
||||
| `src/libxrpl/telemetry/TelemetryConfig.cpp` | Config parser (`setup_Telemetry()`) |
|
||||
| `src/libxrpl/telemetry/NullTelemetry.cpp` | No-op implementation (used when disabled) |
|
||||
| `src/xrpld/telemetry/TracingInstrumentation.h` | Convenience macros (`XRPL_TRACE_RPC`, etc.) |
|
||||
| `src/xrpld/rpc/detail/ServerHandler.cpp` | RPC entry point instrumentation |
|
||||
| `src/xrpld/rpc/detail/RPCHandler.cpp` | Per-command instrumentation |
|
||||
| `docker/telemetry/docker-compose.yml` | Observability stack (Collector + Jaeger + Grafana) |
|
||||
| `docker/telemetry/otel-collector-config.yaml` | OTel Collector pipeline configuration |
|
||||
|
||||
### Conditional compilation
|
||||
|
||||
All OpenTelemetry SDK headers are guarded behind `#ifdef XRPL_ENABLE_TELEMETRY`.
|
||||
The instrumentation macros in `TracingInstrumentation.h` compile to `((void)0)` when
|
||||
the define is absent.
|
||||
At runtime, if `enabled=0` is set in config (or the section is omitted), a
|
||||
`NullTelemetry` implementation is used that returns no-op spans.
|
||||
This two-layer approach ensures zero overhead when telemetry is not wanted.
|
||||
@@ -1,73 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <xrpl/beast/utility/Journal.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <string_view>
|
||||
|
||||
namespace xrpl {
|
||||
|
||||
// cSpell:ignore ptmalloc
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Allocator interaction note:
|
||||
// - This facility invokes glibc's malloc_trim(0) on Linux/glibc to request that
|
||||
// ptmalloc return free heap pages to the OS.
|
||||
// - If an alternative allocator (e.g. jemalloc or tcmalloc) is linked or
|
||||
// preloaded (LD_PRELOAD), calling glibc's malloc_trim typically has no effect
|
||||
// on the *active* heap. The call is harmless but may not reclaim memory
|
||||
// because those allocators manage their own arenas.
|
||||
// - Only glibc sbrk/arena space is eligible for trimming; large mmap-backed
|
||||
// allocations are usually returned to the OS on free regardless of trimming.
|
||||
// - Call at known reclamation points (e.g., after cache sweeps / online delete)
|
||||
// and consider rate limiting to avoid churn.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
struct MallocTrimReport
|
||||
{
|
||||
bool supported{false};
|
||||
int trimResult{-1};
|
||||
std::int64_t rssBeforeKB{-1};
|
||||
std::int64_t rssAfterKB{-1};
|
||||
std::chrono::microseconds durationUs{-1};
|
||||
std::int64_t minfltDelta{-1};
|
||||
std::int64_t majfltDelta{-1};
|
||||
|
||||
[[nodiscard]] std::int64_t
|
||||
deltaKB() const noexcept
|
||||
{
|
||||
if (rssBeforeKB < 0 || rssAfterKB < 0)
|
||||
return 0;
|
||||
return rssAfterKB - rssBeforeKB;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Attempt to return freed memory to the operating system.
|
||||
*
|
||||
* On Linux with glibc malloc, this issues ::malloc_trim(0), which may release
|
||||
* free space from ptmalloc arenas back to the kernel. On other platforms, or if
|
||||
* a different allocator is in use, this function is a no-op and the report will
|
||||
* indicate that trimming is unsupported or had no effect.
|
||||
*
|
||||
* @param tag Identifier for logging/debugging purposes.
|
||||
* @param journal Journal for diagnostic logging.
|
||||
* @return Report containing before/after metrics and the trim result.
|
||||
*
|
||||
* @note If an alternative allocator (jemalloc/tcmalloc) is linked or preloaded,
|
||||
* calling glibc's malloc_trim may have no effect on the active heap. The
|
||||
* call is harmless but typically does not reclaim memory under those
|
||||
* allocators.
|
||||
*
|
||||
* @note Only memory served from glibc's sbrk/arena heaps is eligible for trim.
|
||||
* Large allocations satisfied via mmap are usually returned on free
|
||||
* independently of trimming.
|
||||
*
|
||||
* @note Intended for use after operations that free significant memory (e.g.,
|
||||
* cache sweeps, ledger cleanup, online delete). Consider rate limiting.
|
||||
*/
|
||||
MallocTrimReport
|
||||
mallocTrim(std::string_view tag, beast::Journal journal);
|
||||
|
||||
} // namespace xrpl
|
||||
@@ -19,6 +19,9 @@ class Manager;
|
||||
namespace perf {
|
||||
class PerfLog;
|
||||
}
|
||||
namespace telemetry {
|
||||
class Telemetry;
|
||||
}
|
||||
|
||||
// This is temporary until we migrate all code to use ServiceRegistry.
|
||||
class Application;
|
||||
@@ -205,6 +208,9 @@ public:
|
||||
virtual perf::PerfLog&
|
||||
getPerfLog() = 0;
|
||||
|
||||
virtual telemetry::Telemetry&
|
||||
getTelemetry() = 0;
|
||||
|
||||
// Configuration and state
|
||||
virtual bool
|
||||
isStopping() const = 0;
|
||||
|
||||
@@ -77,16 +77,16 @@ public:
|
||||
If the object is not found or an error is encountered, the
|
||||
result will indicate the condition.
|
||||
@note This will be called concurrently.
|
||||
@param hash The hash of the object.
|
||||
@param key A pointer to the key data.
|
||||
@param pObject [out] The created object if successful.
|
||||
@return The result of the operation.
|
||||
*/
|
||||
virtual Status
|
||||
fetch(uint256 const& hash, std::shared_ptr<NodeObject>* pObject) = 0;
|
||||
fetch(void const* key, std::shared_ptr<NodeObject>* pObject) = 0;
|
||||
|
||||
/** Fetch a batch synchronously. */
|
||||
virtual std::pair<std::vector<std::shared_ptr<NodeObject>>, Status>
|
||||
fetchBatch(std::vector<uint256> const& hashes) = 0;
|
||||
fetchBatch(std::vector<uint256 const*> const& hashes) = 0;
|
||||
|
||||
/** Store a single object.
|
||||
Depending on the implementation this may happen immediately
|
||||
|
||||
@@ -15,10 +15,9 @@
|
||||
|
||||
// Add new amendments to the top of this list.
|
||||
// Keep it sorted in reverse chronological order.
|
||||
|
||||
XRPL_FIX (PermissionedDomainInvariant, Supported::yes, VoteBehavior::DefaultNo)
|
||||
XRPL_FIX (ExpiredNFTokenOfferRemoval, Supported::yes, VoteBehavior::DefaultNo)
|
||||
XRPL_FIX (BatchInnerSigs, Supported::no, VoteBehavior::DefaultNo)
|
||||
XRPL_FIX (BatchInnerSigs, Supported::yes, VoteBehavior::DefaultNo)
|
||||
XRPL_FEATURE(LendingProtocol, Supported::yes, VoteBehavior::DefaultNo)
|
||||
XRPL_FEATURE(PermissionDelegationV1_1, Supported::no, VoteBehavior::DefaultNo)
|
||||
XRPL_FIX (DirectoryLimit, Supported::yes, VoteBehavior::DefaultNo)
|
||||
@@ -32,7 +31,7 @@ XRPL_FEATURE(TokenEscrow, Supported::yes, VoteBehavior::DefaultNo
|
||||
XRPL_FIX (EnforceNFTokenTrustlineV2, Supported::yes, VoteBehavior::DefaultNo)
|
||||
XRPL_FIX (AMMv1_3, Supported::yes, VoteBehavior::DefaultNo)
|
||||
XRPL_FEATURE(PermissionedDEX, Supported::yes, VoteBehavior::DefaultNo)
|
||||
XRPL_FEATURE(Batch, Supported::no, VoteBehavior::DefaultNo)
|
||||
XRPL_FEATURE(Batch, Supported::yes, VoteBehavior::DefaultNo)
|
||||
XRPL_FEATURE(SingleAssetVault, Supported::yes, VoteBehavior::DefaultNo)
|
||||
XRPL_FIX (PayChanCancelAfter, Supported::yes, VoteBehavior::DefaultNo)
|
||||
// Check flags in Credential transactions
|
||||
|
||||
155
include/xrpl/telemetry/SpanGuard.h
Normal file
155
include/xrpl/telemetry/SpanGuard.h
Normal file
@@ -0,0 +1,155 @@
|
||||
#pragma once
|
||||
|
||||
/** RAII guard for OpenTelemetry trace spans.
|
||||
|
||||
Wraps an OTel Span and Scope together. On construction, the span is
|
||||
activated on the current thread's context (via Scope). On destruction,
|
||||
the span is ended and the previous context is restored.
|
||||
|
||||
Used by the XRPL_TRACE_* macros in TracingInstrumentation.h. Can also
|
||||
be stored in std::optional for conditional tracing (move-constructible).
|
||||
|
||||
Only compiled when XRPL_ENABLE_TELEMETRY is defined.
|
||||
*/
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
|
||||
#include <opentelemetry/context/runtime_context.h>
|
||||
#include <opentelemetry/nostd/shared_ptr.h>
|
||||
#include <opentelemetry/trace/scope.h>
|
||||
#include <opentelemetry/trace/span.h>
|
||||
|
||||
#include <exception>
|
||||
#include <string_view>
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
/** RAII wrapper that activates a span on construction and ends it on
|
||||
destruction. Non-copyable but move-constructible so it can be held
|
||||
in std::optional for conditional tracing.
|
||||
*/
|
||||
class SpanGuard
|
||||
{
|
||||
/** The OTel span being guarded. Set to nullptr after move. */
|
||||
opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span> span_;
|
||||
|
||||
/** Scope that activates span_ on the current thread's context stack. */
|
||||
opentelemetry::trace::Scope scope_;
|
||||
|
||||
public:
|
||||
/** Construct a guard that activates @p span on the current context.
|
||||
|
||||
@param span The span to guard. Ended in the destructor.
|
||||
*/
|
||||
explicit SpanGuard(opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span> span)
|
||||
: span_(std::move(span)), scope_(span_)
|
||||
{
|
||||
}
|
||||
|
||||
/** Non-copyable. Move-constructible to support std::optional.
|
||||
|
||||
The move constructor creates a new Scope from the transferred span,
|
||||
because Scope is not movable.
|
||||
*/
|
||||
SpanGuard(SpanGuard const&) = delete;
|
||||
SpanGuard&
|
||||
operator=(SpanGuard const&) = delete;
|
||||
SpanGuard(SpanGuard&& other) noexcept : span_(std::move(other.span_)), scope_(span_)
|
||||
{
|
||||
other.span_ = nullptr;
|
||||
}
|
||||
SpanGuard&
|
||||
operator=(SpanGuard&&) = delete;
|
||||
|
||||
~SpanGuard()
|
||||
{
|
||||
if (span_)
|
||||
span_->End();
|
||||
}
|
||||
|
||||
/** @return A mutable reference to the underlying span. */
|
||||
opentelemetry::trace::Span&
|
||||
span()
|
||||
{
|
||||
return *span_;
|
||||
}
|
||||
|
||||
/** @return A const reference to the underlying span. */
|
||||
opentelemetry::trace::Span const&
|
||||
span() const
|
||||
{
|
||||
return *span_;
|
||||
}
|
||||
|
||||
/** Mark the span status as OK. */
|
||||
void
|
||||
setOk()
|
||||
{
|
||||
span_->SetStatus(opentelemetry::trace::StatusCode::kOk);
|
||||
}
|
||||
|
||||
/** Set an explicit status code on the span.
|
||||
|
||||
@param code The OTel status code.
|
||||
@param description Optional human-readable status description.
|
||||
*/
|
||||
void
|
||||
setStatus(opentelemetry::trace::StatusCode code, std::string_view description = "")
|
||||
{
|
||||
span_->SetStatus(code, std::string(description));
|
||||
}
|
||||
|
||||
/** Set a key-value attribute on the span.
|
||||
|
||||
@param key Attribute name (e.g. "xrpl.rpc.command").
|
||||
@param value Attribute value (string, int, bool, etc.).
|
||||
*/
|
||||
template <typename T>
|
||||
void
|
||||
setAttribute(std::string_view key, T&& value)
|
||||
{
|
||||
span_->SetAttribute(
|
||||
opentelemetry::nostd::string_view(key.data(), key.size()), std::forward<T>(value));
|
||||
}
|
||||
|
||||
/** Add a named event to the span's timeline.
|
||||
|
||||
@param name Event name.
|
||||
*/
|
||||
void
|
||||
addEvent(std::string_view name)
|
||||
{
|
||||
span_->AddEvent(std::string(name));
|
||||
}
|
||||
|
||||
/** Record an exception as a span event following OTel semantic
|
||||
conventions, and mark the span status as error.
|
||||
|
||||
@param e The exception to record.
|
||||
*/
|
||||
void
|
||||
recordException(std::exception const& e)
|
||||
{
|
||||
span_->AddEvent(
|
||||
"exception",
|
||||
{{"exception.type", "std::exception"}, {"exception.message", std::string(e.what())}});
|
||||
span_->SetStatus(opentelemetry::trace::StatusCode::kError, e.what());
|
||||
}
|
||||
|
||||
/** Return the current OTel context.
|
||||
|
||||
Useful for creating child spans on a different thread by passing
|
||||
this context to Telemetry::startSpan(name, parentContext).
|
||||
*/
|
||||
opentelemetry::context::Context
|
||||
context() const
|
||||
{
|
||||
return opentelemetry::context::RuntimeContext::GetCurrent();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
|
||||
#endif // XRPL_ENABLE_TELEMETRY
|
||||
209
include/xrpl/telemetry/Telemetry.h
Normal file
209
include/xrpl/telemetry/Telemetry.h
Normal file
@@ -0,0 +1,209 @@
|
||||
#pragma once
|
||||
|
||||
/** Abstract interface for OpenTelemetry distributed tracing.
|
||||
|
||||
Provides the Telemetry base class that all components use to create trace
|
||||
spans. Two implementations exist:
|
||||
|
||||
- TelemetryImpl (Telemetry.cpp): real OTel SDK integration, compiled
|
||||
only when XRPL_ENABLE_TELEMETRY is defined and enabled at runtime.
|
||||
- NullTelemetry (NullTelemetry.cpp): no-op stub used when telemetry is
|
||||
disabled at compile time or runtime.
|
||||
|
||||
The Setup struct holds all configuration parsed from the [telemetry]
|
||||
section of xrpld.cfg. See TelemetryConfig.cpp for the parser and
|
||||
cfg/xrpld-example.cfg for the available options.
|
||||
|
||||
OTel SDK headers are conditionally included behind XRPL_ENABLE_TELEMETRY
|
||||
so that builds without telemetry have zero dependency on opentelemetry-cpp.
|
||||
*/
|
||||
|
||||
#include <xrpl/basics/BasicConfig.h>
|
||||
#include <xrpl/beast/utility/Journal.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
#include <opentelemetry/context/context.h>
|
||||
#include <opentelemetry/nostd/shared_ptr.h>
|
||||
#include <opentelemetry/trace/span.h>
|
||||
#include <opentelemetry/trace/tracer.h>
|
||||
#endif
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
class Telemetry
|
||||
{
|
||||
public:
|
||||
/** Configuration parsed from the [telemetry] section of xrpld.cfg.
|
||||
|
||||
All fields have sensible defaults so the section can be minimal
|
||||
or omitted entirely. See TelemetryConfig.cpp for the parser.
|
||||
*/
|
||||
struct Setup
|
||||
{
|
||||
/** Master switch: true to enable tracing at runtime. */
|
||||
bool enabled = false;
|
||||
|
||||
/** OTel resource attribute `service.name`. */
|
||||
std::string serviceName = "rippled";
|
||||
|
||||
/** OTel resource attribute `service.version` (set from BuildInfo). */
|
||||
std::string serviceVersion;
|
||||
|
||||
/** OTel resource attribute `service.instance.id` (defaults to node
|
||||
public key). */
|
||||
std::string serviceInstanceId;
|
||||
|
||||
/** Exporter type: currently only "otlp_http" is supported. */
|
||||
std::string exporterType = "otlp_http";
|
||||
|
||||
/** OTLP/HTTP endpoint URL where spans are sent. */
|
||||
std::string exporterEndpoint = "http://localhost:4318/v1/traces";
|
||||
|
||||
/** Whether to use TLS for the exporter connection. */
|
||||
bool useTls = false;
|
||||
|
||||
/** Path to a CA certificate bundle for TLS verification. */
|
||||
std::string tlsCertPath;
|
||||
|
||||
/** Head-based sampling ratio in [0.0, 1.0]. 1.0 = trace everything. */
|
||||
double samplingRatio = 1.0;
|
||||
|
||||
/** Maximum number of spans per batch export. */
|
||||
std::uint32_t batchSize = 512;
|
||||
|
||||
/** Delay between batch exports. */
|
||||
std::chrono::milliseconds batchDelay{5000};
|
||||
|
||||
/** Maximum number of spans queued before dropping. */
|
||||
std::uint32_t maxQueueSize = 2048;
|
||||
|
||||
/** Network identifier, added as an OTel resource attribute. */
|
||||
std::uint32_t networkId = 0;
|
||||
|
||||
/** Network type label (e.g. "mainnet", "testnet", "devnet"). */
|
||||
std::string networkType = "mainnet";
|
||||
|
||||
/** Enable tracing for transaction processing. */
|
||||
bool traceTransactions = true;
|
||||
|
||||
/** Enable tracing for consensus rounds. */
|
||||
bool traceConsensus = true;
|
||||
|
||||
/** Enable tracing for RPC request handling. */
|
||||
bool traceRpc = true;
|
||||
|
||||
/** Enable tracing for peer-to-peer messages (disabled by default
|
||||
due to high volume). */
|
||||
bool tracePeer = false;
|
||||
|
||||
/** Enable tracing for ledger close/accept. */
|
||||
bool traceLedger = true;
|
||||
};
|
||||
|
||||
virtual ~Telemetry() = default;
|
||||
|
||||
/** Initialize the tracing pipeline (exporter, processor, provider).
|
||||
Call after construction.
|
||||
*/
|
||||
virtual void
|
||||
start() = 0;
|
||||
|
||||
/** Flush pending spans and shut down the tracing pipeline.
|
||||
Call before destruction.
|
||||
*/
|
||||
virtual void
|
||||
stop() = 0;
|
||||
|
||||
/** @return true if this instance is actively exporting spans. */
|
||||
virtual bool
|
||||
isEnabled() const = 0;
|
||||
|
||||
/** @return true if transaction processing should be traced. */
|
||||
virtual bool
|
||||
shouldTraceTransactions() const = 0;
|
||||
|
||||
/** @return true if consensus rounds should be traced. */
|
||||
virtual bool
|
||||
shouldTraceConsensus() const = 0;
|
||||
|
||||
/** @return true if RPC request handling should be traced. */
|
||||
virtual bool
|
||||
shouldTraceRpc() const = 0;
|
||||
|
||||
/** @return true if peer-to-peer messages should be traced. */
|
||||
virtual bool
|
||||
shouldTracePeer() const = 0;
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
/** Get or create a named tracer instance.
|
||||
|
||||
@param name Tracer name used to identify the instrumentation library.
|
||||
@return A shared pointer to the Tracer.
|
||||
*/
|
||||
virtual opentelemetry::nostd::shared_ptr<opentelemetry::trace::Tracer>
|
||||
getTracer(std::string_view name = "rippled") = 0;
|
||||
|
||||
/** Start a new span on the current thread's context.
|
||||
|
||||
The span becomes a child of the current active span (if any) via
|
||||
OpenTelemetry's context propagation.
|
||||
|
||||
@param name Span name (typically "rpc.command.<cmd>").
|
||||
@param kind The span kind (defaults to kInternal).
|
||||
@return A shared pointer to the new Span.
|
||||
*/
|
||||
virtual opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>
|
||||
startSpan(
|
||||
std::string_view name,
|
||||
opentelemetry::trace::SpanKind kind = opentelemetry::trace::SpanKind::kInternal) = 0;
|
||||
|
||||
/** Start a new span with an explicit parent context.
|
||||
|
||||
Use this overload when the parent span is not on the current
|
||||
thread's context stack (e.g. cross-thread trace propagation).
|
||||
|
||||
@param name Span name.
|
||||
@param parentContext The parent span's context.
|
||||
@param kind The span kind (defaults to kInternal).
|
||||
@return A shared pointer to the new Span.
|
||||
*/
|
||||
virtual opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>
|
||||
startSpan(
|
||||
std::string_view name,
|
||||
opentelemetry::context::Context const& parentContext,
|
||||
opentelemetry::trace::SpanKind kind = opentelemetry::trace::SpanKind::kInternal) = 0;
|
||||
#endif
|
||||
};
|
||||
|
||||
/** Create a Telemetry instance.
|
||||
|
||||
Returns a TelemetryImpl when setup.enabled is true, or a
|
||||
NullTelemetry no-op stub otherwise.
|
||||
|
||||
@param setup Configuration from the [telemetry] config section.
|
||||
@param journal Journal for log output during initialization.
|
||||
*/
|
||||
std::unique_ptr<Telemetry>
|
||||
make_Telemetry(Telemetry::Setup const& setup, beast::Journal journal);
|
||||
|
||||
/** Parse the [telemetry] config section into a Setup struct.
|
||||
|
||||
@param section The [telemetry] config section.
|
||||
@param nodePublicKey Node public key, used as default instance ID.
|
||||
@param version Build version string.
|
||||
@return A populated Setup struct with defaults for missing values.
|
||||
*/
|
||||
Telemetry::Setup
|
||||
setup_Telemetry(
|
||||
Section const& section,
|
||||
std::string const& nodePublicKey,
|
||||
std::string const& version);
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
@@ -1,157 +0,0 @@
|
||||
#include <xrpl/basics/Log.h>
|
||||
#include <xrpl/basics/MallocTrim.h>
|
||||
|
||||
#include <boost/predef.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
||||
#if defined(__GLIBC__) && BOOST_OS_LINUX
|
||||
#include <sys/resource.h>
|
||||
|
||||
#include <malloc.h>
|
||||
#include <unistd.h>
|
||||
|
||||
// Require RUSAGE_THREAD for thread-scoped page fault tracking
|
||||
#ifndef RUSAGE_THREAD
|
||||
#error "MallocTrim rusage instrumentation requires RUSAGE_THREAD on Linux/glibc"
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
|
||||
bool
|
||||
getRusageThread(struct rusage& ru)
|
||||
{
|
||||
return ::getrusage(RUSAGE_THREAD, &ru) == 0; // LCOV_EXCL_LINE
|
||||
}
|
||||
|
||||
} // namespace
|
||||
#endif
|
||||
|
||||
namespace xrpl {
|
||||
|
||||
namespace detail {
|
||||
|
||||
// cSpell:ignore statm
|
||||
|
||||
#if defined(__GLIBC__) && BOOST_OS_LINUX
|
||||
|
||||
inline int
|
||||
mallocTrimWithPad(std::size_t padBytes)
|
||||
{
|
||||
return ::malloc_trim(padBytes);
|
||||
}
|
||||
|
||||
long
|
||||
parseStatmRSSkB(std::string const& statm)
|
||||
{
|
||||
// /proc/self/statm format: size resident shared text lib data dt
|
||||
// We want the second field (resident) which is in pages
|
||||
std::istringstream iss(statm);
|
||||
long size, resident;
|
||||
if (!(iss >> size >> resident))
|
||||
return -1;
|
||||
|
||||
// Convert pages to KB
|
||||
long const pageSize = ::sysconf(_SC_PAGESIZE);
|
||||
if (pageSize <= 0)
|
||||
return -1;
|
||||
|
||||
return (resident * pageSize) / 1024;
|
||||
}
|
||||
|
||||
#endif // __GLIBC__ && BOOST_OS_LINUX
|
||||
|
||||
} // namespace detail
|
||||
|
||||
MallocTrimReport
|
||||
mallocTrim(std::string_view tag, beast::Journal journal)
|
||||
{
|
||||
// LCOV_EXCL_START
|
||||
|
||||
MallocTrimReport report;
|
||||
|
||||
#if !(defined(__GLIBC__) && BOOST_OS_LINUX)
|
||||
JLOG(journal.debug()) << "malloc_trim not supported on this platform (tag=" << tag << ")";
|
||||
#else
|
||||
// Keep glibc malloc_trim padding at 0 (default): 12h Mainnet tests across 0/256KB/1MB/16MB
|
||||
// showed no clear, consistent benefit from custom padding—0 provided the best overall balance
|
||||
// of RSS reduction and trim-latency stability without adding a tuning surface.
|
||||
constexpr std::size_t TRIM_PAD = 0;
|
||||
|
||||
report.supported = true;
|
||||
|
||||
if (journal.debug())
|
||||
{
|
||||
auto readFile = [](std::string const& path) -> std::string {
|
||||
std::ifstream ifs(path, std::ios::in | std::ios::binary);
|
||||
if (!ifs.is_open())
|
||||
return {};
|
||||
|
||||
// /proc files are often not seekable; read as a stream.
|
||||
std::ostringstream oss;
|
||||
oss << ifs.rdbuf();
|
||||
return oss.str();
|
||||
};
|
||||
|
||||
std::string const tagStr{tag};
|
||||
std::string const statmPath = "/proc/self/statm";
|
||||
|
||||
auto const statmBefore = readFile(statmPath);
|
||||
long const rssBeforeKB = detail::parseStatmRSSkB(statmBefore);
|
||||
|
||||
struct rusage ru0{};
|
||||
bool const have_ru0 = getRusageThread(ru0);
|
||||
|
||||
auto const t0 = std::chrono::steady_clock::now();
|
||||
|
||||
report.trimResult = detail::mallocTrimWithPad(TRIM_PAD);
|
||||
|
||||
auto const t1 = std::chrono::steady_clock::now();
|
||||
|
||||
struct rusage ru1{};
|
||||
bool const have_ru1 = getRusageThread(ru1);
|
||||
|
||||
auto const statmAfter = readFile(statmPath);
|
||||
long const rssAfterKB = detail::parseStatmRSSkB(statmAfter);
|
||||
|
||||
// Populate report fields
|
||||
report.rssBeforeKB = rssBeforeKB;
|
||||
report.rssAfterKB = rssAfterKB;
|
||||
report.durationUs = std::chrono::duration_cast<std::chrono::microseconds>(t1 - t0);
|
||||
|
||||
if (have_ru0 && have_ru1)
|
||||
{
|
||||
report.minfltDelta = ru1.ru_minflt - ru0.ru_minflt;
|
||||
report.majfltDelta = ru1.ru_majflt - ru0.ru_majflt;
|
||||
}
|
||||
|
||||
std::int64_t const deltaKB = (rssBeforeKB < 0 || rssAfterKB < 0)
|
||||
? 0
|
||||
: (static_cast<std::int64_t>(rssAfterKB) - static_cast<std::int64_t>(rssBeforeKB));
|
||||
|
||||
JLOG(journal.debug()) << "malloc_trim tag=" << tagStr << " result=" << report.trimResult
|
||||
<< " pad=" << TRIM_PAD << " bytes"
|
||||
<< " rss_before=" << rssBeforeKB << "kB"
|
||||
<< " rss_after=" << rssAfterKB << "kB"
|
||||
<< " delta=" << deltaKB << "kB"
|
||||
<< " duration_us=" << report.durationUs.count()
|
||||
<< " minflt_delta=" << report.minfltDelta
|
||||
<< " majflt_delta=" << report.majfltDelta;
|
||||
}
|
||||
else
|
||||
{
|
||||
report.trimResult = detail::mallocTrimWithPad(TRIM_PAD);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
return report;
|
||||
|
||||
// LCOV_EXCL_STOP
|
||||
}
|
||||
|
||||
} // namespace xrpl
|
||||
@@ -33,7 +33,7 @@ DatabaseNodeImp::fetchNodeObject(
|
||||
|
||||
try
|
||||
{
|
||||
status = backend_->fetch(hash, &nodeObject);
|
||||
status = backend_->fetch(hash.data(), &nodeObject);
|
||||
}
|
||||
catch (std::exception const& e)
|
||||
{
|
||||
@@ -68,10 +68,18 @@ DatabaseNodeImp::fetchBatch(std::vector<uint256> const& hashes)
|
||||
using namespace std::chrono;
|
||||
auto const before = steady_clock::now();
|
||||
|
||||
std::vector<uint256 const*> batch{};
|
||||
batch.reserve(hashes.size());
|
||||
for (size_t i = 0; i < hashes.size(); ++i)
|
||||
{
|
||||
auto const& hash = hashes[i];
|
||||
batch.push_back(&hash);
|
||||
}
|
||||
|
||||
// Get the node objects that match the hashes from the backend. To protect
|
||||
// against the backends returning fewer or more results than expected, the
|
||||
// container is resized to the number of hashes.
|
||||
auto results = backend_->fetchBatch(hashes).first;
|
||||
auto results = backend_->fetchBatch(batch).first;
|
||||
XRPL_ASSERT(
|
||||
results.size() == hashes.size() || results.empty(),
|
||||
"number of output objects either matches number of input hashes or is empty");
|
||||
|
||||
@@ -105,7 +105,7 @@ DatabaseRotatingImp::fetchNodeObject(
|
||||
std::shared_ptr<NodeObject> nodeObject;
|
||||
try
|
||||
{
|
||||
status = backend->fetch(hash, &nodeObject);
|
||||
status = backend->fetch(hash.data(), &nodeObject);
|
||||
}
|
||||
catch (std::exception const& e)
|
||||
{
|
||||
|
||||
@@ -116,9 +116,10 @@ public:
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
Status
|
||||
fetch(uint256 const& hash, std::shared_ptr<NodeObject>* pObject) override
|
||||
fetch(void const* key, std::shared_ptr<NodeObject>* pObject) override
|
||||
{
|
||||
XRPL_ASSERT(db_, "xrpl::NodeStore::MemoryBackend::fetch : non-null database");
|
||||
uint256 const hash(uint256::fromVoid(key));
|
||||
|
||||
std::lock_guard _(db_->mutex);
|
||||
|
||||
@@ -133,14 +134,14 @@ public:
|
||||
}
|
||||
|
||||
std::pair<std::vector<std::shared_ptr<NodeObject>>, Status>
|
||||
fetchBatch(std::vector<uint256> const& hashes) override
|
||||
fetchBatch(std::vector<uint256 const*> const& hashes) override
|
||||
{
|
||||
std::vector<std::shared_ptr<NodeObject>> results;
|
||||
results.reserve(hashes.size());
|
||||
for (auto const& h : hashes)
|
||||
{
|
||||
std::shared_ptr<NodeObject> nObj;
|
||||
Status status = fetch(h, &nObj);
|
||||
Status status = fetch(h->begin(), &nObj);
|
||||
if (status != ok)
|
||||
results.push_back({});
|
||||
else
|
||||
|
||||
@@ -179,17 +179,17 @@ public:
|
||||
}
|
||||
|
||||
Status
|
||||
fetch(uint256 const& hash, std::shared_ptr<NodeObject>* pno) override
|
||||
fetch(void const* key, std::shared_ptr<NodeObject>* pno) override
|
||||
{
|
||||
Status status;
|
||||
pno->reset();
|
||||
nudb::error_code ec;
|
||||
db_.fetch(
|
||||
hash.data(),
|
||||
[&hash, pno, &status](void const* data, std::size_t size) {
|
||||
key,
|
||||
[key, pno, &status](void const* data, std::size_t size) {
|
||||
nudb::detail::buffer bf;
|
||||
auto const result = nodeobject_decompress(data, size, bf);
|
||||
DecodedBlob decoded(hash.data(), result.first, result.second);
|
||||
DecodedBlob decoded(key, result.first, result.second);
|
||||
if (!decoded.wasOk())
|
||||
{
|
||||
status = dataCorrupt;
|
||||
@@ -207,14 +207,14 @@ public:
|
||||
}
|
||||
|
||||
std::pair<std::vector<std::shared_ptr<NodeObject>>, Status>
|
||||
fetchBatch(std::vector<uint256> const& hashes) override
|
||||
fetchBatch(std::vector<uint256 const*> const& hashes) override
|
||||
{
|
||||
std::vector<std::shared_ptr<NodeObject>> results;
|
||||
results.reserve(hashes.size());
|
||||
for (auto const& h : hashes)
|
||||
{
|
||||
std::shared_ptr<NodeObject> nObj;
|
||||
Status status = fetch(h, &nObj);
|
||||
Status status = fetch(h->begin(), &nObj);
|
||||
if (status != ok)
|
||||
results.push_back({});
|
||||
else
|
||||
|
||||
@@ -36,13 +36,13 @@ public:
|
||||
}
|
||||
|
||||
Status
|
||||
fetch(uint256 const&, std::shared_ptr<NodeObject>*) override
|
||||
fetch(void const*, std::shared_ptr<NodeObject>*) override
|
||||
{
|
||||
return notFound;
|
||||
}
|
||||
|
||||
std::pair<std::vector<std::shared_ptr<NodeObject>>, Status>
|
||||
fetchBatch(std::vector<uint256> const& hashes) override
|
||||
fetchBatch(std::vector<uint256 const*> const& hashes) override
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -244,7 +244,7 @@ public:
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
Status
|
||||
fetch(uint256 const& hash, std::shared_ptr<NodeObject>* pObject) override
|
||||
fetch(void const* key, std::shared_ptr<NodeObject>* pObject) override
|
||||
{
|
||||
XRPL_ASSERT(m_db, "xrpl::NodeStore::RocksDBBackend::fetch : non-null database");
|
||||
pObject->reset();
|
||||
@@ -252,7 +252,7 @@ public:
|
||||
Status status(ok);
|
||||
|
||||
rocksdb::ReadOptions const options;
|
||||
rocksdb::Slice const slice(std::bit_cast<char const*>(hash.data()), m_keyBytes);
|
||||
rocksdb::Slice const slice(static_cast<char const*>(key), m_keyBytes);
|
||||
|
||||
std::string string;
|
||||
|
||||
@@ -260,7 +260,7 @@ public:
|
||||
|
||||
if (getStatus.ok())
|
||||
{
|
||||
DecodedBlob decoded(hash.data(), string.data(), string.size());
|
||||
DecodedBlob decoded(key, string.data(), string.size());
|
||||
|
||||
if (decoded.wasOk())
|
||||
{
|
||||
@@ -295,14 +295,14 @@ public:
|
||||
}
|
||||
|
||||
std::pair<std::vector<std::shared_ptr<NodeObject>>, Status>
|
||||
fetchBatch(std::vector<uint256> const& hashes) override
|
||||
fetchBatch(std::vector<uint256 const*> const& hashes) override
|
||||
{
|
||||
std::vector<std::shared_ptr<NodeObject>> results;
|
||||
results.reserve(hashes.size());
|
||||
for (auto const& h : hashes)
|
||||
{
|
||||
std::shared_ptr<NodeObject> nObj;
|
||||
Status status = fetch(h, &nObj);
|
||||
Status status = fetch(h->begin(), &nObj);
|
||||
if (status != ok)
|
||||
results.push_back({});
|
||||
else
|
||||
@@ -332,8 +332,9 @@ public:
|
||||
EncodedBlob encoded(e);
|
||||
|
||||
wb.Put(
|
||||
rocksdb::Slice(std::bit_cast<char const*>(encoded.getKey()), m_keyBytes),
|
||||
rocksdb::Slice(std::bit_cast<char const*>(encoded.getData()), encoded.getSize()));
|
||||
rocksdb::Slice(reinterpret_cast<char const*>(encoded.getKey()), m_keyBytes),
|
||||
rocksdb::Slice(
|
||||
reinterpret_cast<char const*>(encoded.getData()), encoded.getSize()));
|
||||
}
|
||||
|
||||
rocksdb::WriteOptions const options;
|
||||
|
||||
121
src/libxrpl/telemetry/NullTelemetry.cpp
Normal file
121
src/libxrpl/telemetry/NullTelemetry.cpp
Normal file
@@ -0,0 +1,121 @@
|
||||
/** No-op implementation of the Telemetry interface.
|
||||
|
||||
Always compiled (regardless of XRPL_ENABLE_TELEMETRY). Provides the
|
||||
make_Telemetry() factory when telemetry is compiled out (#ifndef), which
|
||||
unconditionally returns a NullTelemetry that does nothing.
|
||||
|
||||
When XRPL_ENABLE_TELEMETRY IS defined, the OTel virtual methods
|
||||
(getTracer, startSpan) return noop tracers/spans. The make_Telemetry()
|
||||
factory in this file is not used in that case -- Telemetry.cpp provides
|
||||
its own factory that can return the real TelemetryImpl.
|
||||
*/
|
||||
|
||||
#include <xrpl/telemetry/Telemetry.h>
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
#include <opentelemetry/trace/noop.h>
|
||||
#endif
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
namespace {
|
||||
|
||||
/** No-op Telemetry that returns immediately from every method.
|
||||
|
||||
Used as the sole implementation when XRPL_ENABLE_TELEMETRY is not
|
||||
defined, or as a fallback when it is defined but enabled=0.
|
||||
*/
|
||||
class NullTelemetry : public Telemetry
|
||||
{
|
||||
/** Retained configuration (unused, kept for diagnostic access). */
|
||||
Setup const setup_;
|
||||
|
||||
public:
|
||||
explicit NullTelemetry(Setup const& setup) : setup_(setup)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
start() override
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
stop() override
|
||||
{
|
||||
}
|
||||
|
||||
bool
|
||||
isEnabled() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceTransactions() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceConsensus() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceRpc() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTracePeer() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
opentelemetry::nostd::shared_ptr<opentelemetry::trace::Tracer>
|
||||
getTracer(std::string_view) override
|
||||
{
|
||||
static auto noopTracer = opentelemetry::nostd::shared_ptr<opentelemetry::trace::Tracer>(
|
||||
new opentelemetry::trace::NoopTracer());
|
||||
return noopTracer;
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>
|
||||
startSpan(std::string_view, opentelemetry::trace::SpanKind) override
|
||||
{
|
||||
return opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>(
|
||||
new opentelemetry::trace::NoopSpan(nullptr));
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>
|
||||
startSpan(
|
||||
std::string_view,
|
||||
opentelemetry::context::Context const&,
|
||||
opentelemetry::trace::SpanKind) override
|
||||
{
|
||||
return opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span>(
|
||||
new opentelemetry::trace::NoopSpan(nullptr));
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
/** Factory used when XRPL_ENABLE_TELEMETRY is not defined.
|
||||
Unconditionally returns a NullTelemetry instance.
|
||||
*/
|
||||
#ifndef XRPL_ENABLE_TELEMETRY
|
||||
std::unique_ptr<Telemetry>
|
||||
make_Telemetry(Telemetry::Setup const& setup, beast::Journal)
|
||||
{
|
||||
return std::make_unique<NullTelemetry>(setup);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
279
src/libxrpl/telemetry/Telemetry.cpp
Normal file
279
src/libxrpl/telemetry/Telemetry.cpp
Normal file
@@ -0,0 +1,279 @@
|
||||
/** OpenTelemetry SDK implementation of the Telemetry interface.
|
||||
|
||||
Compiled only when XRPL_ENABLE_TELEMETRY is defined (via CMake
|
||||
telemetry=ON). Contains:
|
||||
|
||||
- TelemetryImpl: configures the OTel SDK with an OTLP/HTTP exporter,
|
||||
batch span processor, trace-ID-ratio sampler, and resource attributes.
|
||||
- NullTelemetryOtel: no-op fallback used when telemetry is compiled in
|
||||
but disabled at runtime (enabled=0 in config).
|
||||
- make_Telemetry(): factory that selects the appropriate implementation.
|
||||
*/
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
|
||||
#include <xrpl/basics/Log.h>
|
||||
#include <xrpl/telemetry/Telemetry.h>
|
||||
|
||||
#include <opentelemetry/exporters/otlp/otlp_http_exporter_factory.h>
|
||||
#include <opentelemetry/exporters/otlp/otlp_http_exporter_options.h>
|
||||
#include <opentelemetry/sdk/resource/semantic_conventions.h>
|
||||
#include <opentelemetry/sdk/trace/batch_span_processor_factory.h>
|
||||
#include <opentelemetry/sdk/trace/batch_span_processor_options.h>
|
||||
#include <opentelemetry/sdk/trace/sampler.h>
|
||||
#include <opentelemetry/sdk/trace/samplers/trace_id_ratio.h>
|
||||
#include <opentelemetry/sdk/trace/tracer_provider.h>
|
||||
#include <opentelemetry/sdk/trace/tracer_provider_factory.h>
|
||||
#include <opentelemetry/trace/noop.h>
|
||||
#include <opentelemetry/trace/provider.h>
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
namespace {
|
||||
|
||||
namespace trace_api = opentelemetry::trace;
|
||||
namespace trace_sdk = opentelemetry::sdk::trace;
|
||||
namespace otlp_http = opentelemetry::exporter::otlp;
|
||||
namespace resource = opentelemetry::sdk::resource;
|
||||
|
||||
/** No-op implementation used when XRPL_ENABLE_TELEMETRY is defined but
|
||||
setup.enabled is false at runtime.
|
||||
|
||||
Lives in the anonymous namespace so there is no ODR conflict with the
|
||||
NullTelemetry in NullTelemetry.cpp.
|
||||
*/
|
||||
class NullTelemetryOtel : public Telemetry
|
||||
{
|
||||
/** Retained configuration (unused, kept for diagnostic access). */
|
||||
Setup const setup_;
|
||||
|
||||
public:
|
||||
explicit NullTelemetryOtel(Setup const& setup) : setup_(setup)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
start() override
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
stop() override
|
||||
{
|
||||
}
|
||||
|
||||
bool
|
||||
isEnabled() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceTransactions() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceConsensus() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceRpc() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTracePeer() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Tracer>
|
||||
getTracer(std::string_view) override
|
||||
{
|
||||
static auto noopTracer =
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Tracer>(new trace_api::NoopTracer());
|
||||
return noopTracer;
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Span>
|
||||
startSpan(std::string_view, trace_api::SpanKind) override
|
||||
{
|
||||
return opentelemetry::nostd::shared_ptr<trace_api::Span>(new trace_api::NoopSpan(nullptr));
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Span>
|
||||
startSpan(std::string_view, opentelemetry::context::Context const&, trace_api::SpanKind)
|
||||
override
|
||||
{
|
||||
return opentelemetry::nostd::shared_ptr<trace_api::Span>(new trace_api::NoopSpan(nullptr));
|
||||
}
|
||||
};
|
||||
|
||||
/** Full OTel SDK implementation that exports trace spans via OTLP/HTTP.
|
||||
|
||||
Configures an OTLP/HTTP exporter, batch span processor,
|
||||
TraceIdRatioBasedSampler, and resource attributes on start().
|
||||
*/
|
||||
class TelemetryImpl : public Telemetry
|
||||
{
|
||||
/** Configuration from the [telemetry] config section. */
|
||||
Setup const setup_;
|
||||
|
||||
/** Journal used for log output during start/stop. */
|
||||
beast::Journal const journal_;
|
||||
|
||||
/** The SDK TracerProvider that owns the export pipeline.
|
||||
|
||||
Held as std::shared_ptr so we can call ForceFlush() on shutdown.
|
||||
Wrapped in a nostd::shared_ptr when registered as the global provider.
|
||||
*/
|
||||
std::shared_ptr<trace_sdk::TracerProvider> sdkProvider_;
|
||||
|
||||
public:
|
||||
TelemetryImpl(Setup const& setup, beast::Journal journal) : setup_(setup), journal_(journal)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
start() override
|
||||
{
|
||||
JLOG(journal_.info()) << "Telemetry starting: endpoint=" << setup_.exporterEndpoint
|
||||
<< " sampling=" << setup_.samplingRatio;
|
||||
|
||||
// Configure OTLP HTTP exporter
|
||||
otlp_http::OtlpHttpExporterOptions exporterOpts;
|
||||
exporterOpts.url = setup_.exporterEndpoint;
|
||||
if (setup_.useTls)
|
||||
exporterOpts.ssl_ca_cert_path = setup_.tlsCertPath;
|
||||
|
||||
auto exporter = otlp_http::OtlpHttpExporterFactory::Create(exporterOpts);
|
||||
|
||||
// Configure batch processor
|
||||
trace_sdk::BatchSpanProcessorOptions processorOpts;
|
||||
processorOpts.max_queue_size = setup_.maxQueueSize;
|
||||
processorOpts.schedule_delay_millis = std::chrono::milliseconds(setup_.batchDelay);
|
||||
processorOpts.max_export_batch_size = setup_.batchSize;
|
||||
|
||||
auto processor =
|
||||
trace_sdk::BatchSpanProcessorFactory::Create(std::move(exporter), processorOpts);
|
||||
|
||||
// Configure resource attributes
|
||||
auto resourceAttrs = resource::Resource::Create({
|
||||
{resource::SemanticConventions::kServiceName, setup_.serviceName},
|
||||
{resource::SemanticConventions::kServiceVersion, setup_.serviceVersion},
|
||||
{resource::SemanticConventions::kServiceInstanceId, setup_.serviceInstanceId},
|
||||
{"xrpl.network.id", static_cast<int64_t>(setup_.networkId)},
|
||||
{"xrpl.network.type", setup_.networkType},
|
||||
});
|
||||
|
||||
// Configure sampler
|
||||
auto sampler = std::make_unique<trace_sdk::TraceIdRatioBasedSampler>(setup_.samplingRatio);
|
||||
|
||||
// Create TracerProvider
|
||||
sdkProvider_ = trace_sdk::TracerProviderFactory::Create(
|
||||
std::move(processor), resourceAttrs, std::move(sampler));
|
||||
|
||||
// Set as global provider
|
||||
trace_api::Provider::SetTracerProvider(
|
||||
opentelemetry::nostd::shared_ptr<trace_api::TracerProvider>(sdkProvider_));
|
||||
|
||||
JLOG(journal_.info()) << "Telemetry started successfully";
|
||||
}
|
||||
|
||||
void
|
||||
stop() override
|
||||
{
|
||||
JLOG(journal_.info()) << "Telemetry stopping";
|
||||
if (sdkProvider_)
|
||||
{
|
||||
// Force flush before shutdown
|
||||
sdkProvider_->ForceFlush();
|
||||
sdkProvider_.reset();
|
||||
trace_api::Provider::SetTracerProvider(
|
||||
opentelemetry::nostd::shared_ptr<trace_api::TracerProvider>(
|
||||
new trace_api::NoopTracerProvider()));
|
||||
}
|
||||
JLOG(journal_.info()) << "Telemetry stopped";
|
||||
}
|
||||
|
||||
bool
|
||||
isEnabled() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceTransactions() const override
|
||||
{
|
||||
return setup_.traceTransactions;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceConsensus() const override
|
||||
{
|
||||
return setup_.traceConsensus;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceRpc() const override
|
||||
{
|
||||
return setup_.traceRpc;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTracePeer() const override
|
||||
{
|
||||
return setup_.tracePeer;
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Tracer>
|
||||
getTracer(std::string_view name) override
|
||||
{
|
||||
if (!sdkProvider_)
|
||||
return trace_api::Provider::GetTracerProvider()->GetTracer(std::string(name));
|
||||
return sdkProvider_->GetTracer(std::string(name));
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Span>
|
||||
startSpan(std::string_view name, trace_api::SpanKind kind) override
|
||||
{
|
||||
auto tracer = getTracer("rippled");
|
||||
trace_api::StartSpanOptions opts;
|
||||
opts.kind = kind;
|
||||
return tracer->StartSpan(std::string(name), opts);
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Span>
|
||||
startSpan(
|
||||
std::string_view name,
|
||||
opentelemetry::context::Context const& parentContext,
|
||||
trace_api::SpanKind kind) override
|
||||
{
|
||||
auto tracer = getTracer("rippled");
|
||||
trace_api::StartSpanOptions opts;
|
||||
opts.kind = kind;
|
||||
opts.parent = parentContext;
|
||||
return tracer->StartSpan(std::string(name), opts);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<Telemetry>
|
||||
make_Telemetry(Telemetry::Setup const& setup, beast::Journal journal)
|
||||
{
|
||||
if (setup.enabled)
|
||||
return std::make_unique<TelemetryImpl>(setup, journal);
|
||||
return std::make_unique<NullTelemetryOtel>(setup);
|
||||
}
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
|
||||
#endif // XRPL_ENABLE_TELEMETRY
|
||||
52
src/libxrpl/telemetry/TelemetryConfig.cpp
Normal file
52
src/libxrpl/telemetry/TelemetryConfig.cpp
Normal file
@@ -0,0 +1,52 @@
|
||||
/** Parser for the [telemetry] section of xrpld.cfg.
|
||||
|
||||
Reads configuration values from the config file and populates a
|
||||
Telemetry::Setup struct. All options have sensible defaults so the
|
||||
section can be minimal or omitted entirely.
|
||||
|
||||
See cfg/xrpld-example.cfg for the full list of available options.
|
||||
*/
|
||||
|
||||
#include <xrpl/telemetry/Telemetry.h>
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
Telemetry::Setup
|
||||
setup_Telemetry(
|
||||
Section const& section,
|
||||
std::string const& nodePublicKey,
|
||||
std::string const& version)
|
||||
{
|
||||
Telemetry::Setup setup;
|
||||
|
||||
setup.enabled = section.value_or<int>("enabled", 0) != 0;
|
||||
setup.serviceName = section.value_or<std::string>("service_name", "rippled");
|
||||
setup.serviceVersion = version;
|
||||
setup.serviceInstanceId = section.value_or<std::string>("service_instance_id", nodePublicKey);
|
||||
|
||||
setup.exporterType = section.value_or<std::string>("exporter", "otlp_http");
|
||||
setup.exporterEndpoint =
|
||||
section.value_or<std::string>("endpoint", "http://localhost:4318/v1/traces");
|
||||
|
||||
setup.useTls = section.value_or<int>("use_tls", 0) != 0;
|
||||
setup.tlsCertPath = section.value_or<std::string>("tls_ca_cert", "");
|
||||
|
||||
setup.samplingRatio = section.value_or<double>("sampling_ratio", 1.0);
|
||||
|
||||
setup.batchSize = section.value_or<std::uint32_t>("batch_size", 512u);
|
||||
setup.batchDelay =
|
||||
std::chrono::milliseconds{section.value_or<std::uint32_t>("batch_delay_ms", 5000u)};
|
||||
setup.maxQueueSize = section.value_or<std::uint32_t>("max_queue_size", 2048u);
|
||||
|
||||
setup.traceTransactions = section.value_or<int>("trace_transactions", 1) != 0;
|
||||
setup.traceConsensus = section.value_or<int>("trace_consensus", 1) != 0;
|
||||
setup.traceRpc = section.value_or<int>("trace_rpc", 1) != 0;
|
||||
setup.tracePeer = section.value_or<int>("trace_peer", 0) != 0;
|
||||
setup.traceLedger = section.value_or<int>("trace_ledger", 1) != 0;
|
||||
|
||||
return setup;
|
||||
}
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
@@ -138,7 +138,7 @@ public:
|
||||
{
|
||||
std::shared_ptr<NodeObject> object;
|
||||
|
||||
Status const status = backend.fetch(batch[i]->getHash(), &object);
|
||||
Status const status = backend.fetch(batch[i]->getHash().cbegin(), &object);
|
||||
|
||||
BEAST_EXPECT(status == ok);
|
||||
|
||||
@@ -158,7 +158,7 @@ public:
|
||||
{
|
||||
std::shared_ptr<NodeObject> object;
|
||||
|
||||
Status const status = backend.fetch(batch[i]->getHash(), &object);
|
||||
Status const status = backend.fetch(batch[i]->getHash().cbegin(), &object);
|
||||
|
||||
BEAST_EXPECT(status == notFound);
|
||||
}
|
||||
|
||||
@@ -314,7 +314,7 @@ public:
|
||||
std::shared_ptr<NodeObject> obj;
|
||||
std::shared_ptr<NodeObject> result;
|
||||
obj = seq1_.obj(dist_(gen_));
|
||||
backend_.fetch(obj->getHash(), &result);
|
||||
backend_.fetch(obj->getHash().data(), &result);
|
||||
suite_.expect(result && isSame(result, obj));
|
||||
}
|
||||
catch (std::exception const& e)
|
||||
@@ -377,9 +377,9 @@ public:
|
||||
{
|
||||
try
|
||||
{
|
||||
auto const hash = seq2_.key(i);
|
||||
auto const key = seq2_.key(i);
|
||||
std::shared_ptr<NodeObject> result;
|
||||
backend_.fetch(hash, &result);
|
||||
backend_.fetch(key.data(), &result);
|
||||
suite_.expect(!result);
|
||||
}
|
||||
catch (std::exception const& e)
|
||||
@@ -449,9 +449,9 @@ public:
|
||||
{
|
||||
if (rand_(gen_) < missingNodePercent)
|
||||
{
|
||||
auto const hash = seq2_.key(dist_(gen_));
|
||||
auto const key = seq2_.key(dist_(gen_));
|
||||
std::shared_ptr<NodeObject> result;
|
||||
backend_.fetch(hash, &result);
|
||||
backend_.fetch(key.data(), &result);
|
||||
suite_.expect(!result);
|
||||
}
|
||||
else
|
||||
@@ -459,7 +459,7 @@ public:
|
||||
std::shared_ptr<NodeObject> obj;
|
||||
std::shared_ptr<NodeObject> result;
|
||||
obj = seq1_.obj(dist_(gen_));
|
||||
backend_.fetch(obj->getHash(), &result);
|
||||
backend_.fetch(obj->getHash().data(), &result);
|
||||
suite_.expect(result && isSame(result, obj));
|
||||
}
|
||||
}
|
||||
@@ -540,7 +540,8 @@ public:
|
||||
std::shared_ptr<NodeObject> result;
|
||||
auto const j = older_(gen_);
|
||||
obj = seq1_.obj(j);
|
||||
backend_.fetch(obj->getHash(), &result);
|
||||
std::shared_ptr<NodeObject> result1;
|
||||
backend_.fetch(obj->getHash().data(), &result);
|
||||
suite_.expect(result != nullptr);
|
||||
suite_.expect(isSame(result, obj));
|
||||
}
|
||||
@@ -558,7 +559,7 @@ public:
|
||||
std::shared_ptr<NodeObject> result;
|
||||
auto const j = recent_(gen_);
|
||||
obj = seq1_.obj(j);
|
||||
backend_.fetch(obj->getHash(), &result);
|
||||
backend_.fetch(obj->getHash().data(), &result);
|
||||
suite_.expect(!result || isSame(result, obj));
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1,209 +0,0 @@
|
||||
#include <xrpl/basics/MallocTrim.h>
|
||||
|
||||
#include <boost/predef.h>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
using namespace xrpl;
|
||||
|
||||
// cSpell:ignore statm
|
||||
|
||||
#if defined(__GLIBC__) && BOOST_OS_LINUX
|
||||
namespace xrpl::detail {
|
||||
long
|
||||
parseStatmRSSkB(std::string const& statm);
|
||||
} // namespace xrpl::detail
|
||||
#endif
|
||||
|
||||
TEST(MallocTrimReport, structure)
|
||||
{
|
||||
// Test default construction
|
||||
MallocTrimReport report;
|
||||
EXPECT_EQ(report.supported, false);
|
||||
EXPECT_EQ(report.trimResult, -1);
|
||||
EXPECT_EQ(report.rssBeforeKB, -1);
|
||||
EXPECT_EQ(report.rssAfterKB, -1);
|
||||
EXPECT_EQ(report.durationUs, std::chrono::microseconds{-1});
|
||||
EXPECT_EQ(report.minfltDelta, -1);
|
||||
EXPECT_EQ(report.majfltDelta, -1);
|
||||
EXPECT_EQ(report.deltaKB(), 0);
|
||||
|
||||
// Test deltaKB calculation - memory freed
|
||||
report.rssBeforeKB = 1000;
|
||||
report.rssAfterKB = 800;
|
||||
EXPECT_EQ(report.deltaKB(), -200);
|
||||
|
||||
// Test deltaKB calculation - memory increased
|
||||
report.rssBeforeKB = 500;
|
||||
report.rssAfterKB = 600;
|
||||
EXPECT_EQ(report.deltaKB(), 100);
|
||||
|
||||
// Test deltaKB calculation - no change
|
||||
report.rssBeforeKB = 1234;
|
||||
report.rssAfterKB = 1234;
|
||||
EXPECT_EQ(report.deltaKB(), 0);
|
||||
}
|
||||
|
||||
#if defined(__GLIBC__) && BOOST_OS_LINUX
|
||||
TEST(parseStatmRSSkB, standard_format)
|
||||
{
|
||||
using xrpl::detail::parseStatmRSSkB;
|
||||
|
||||
// Test standard format: size resident shared text lib data dt
|
||||
// Assuming 4KB page size: resident=1000 pages = 4000 KB
|
||||
{
|
||||
std::string statm = "25365 1000 2377 0 0 5623 0";
|
||||
long result = parseStatmRSSkB(statm);
|
||||
// Note: actual result depends on system page size
|
||||
// On most systems it's 4KB, so 1000 pages = 4000 KB
|
||||
EXPECT_GT(result, 0);
|
||||
}
|
||||
|
||||
// Test with newline
|
||||
{
|
||||
std::string statm = "12345 2000 1234 0 0 3456 0\n";
|
||||
long result = parseStatmRSSkB(statm);
|
||||
EXPECT_GT(result, 0);
|
||||
}
|
||||
|
||||
// Test with tabs
|
||||
{
|
||||
std::string statm = "12345\t2000\t1234\t0\t0\t3456\t0";
|
||||
long result = parseStatmRSSkB(statm);
|
||||
EXPECT_GT(result, 0);
|
||||
}
|
||||
|
||||
// Test zero resident pages
|
||||
{
|
||||
std::string statm = "25365 0 2377 0 0 5623 0";
|
||||
long result = parseStatmRSSkB(statm);
|
||||
EXPECT_EQ(result, 0);
|
||||
}
|
||||
|
||||
// Test with extra whitespace
|
||||
{
|
||||
std::string statm = " 25365 1000 2377 ";
|
||||
long result = parseStatmRSSkB(statm);
|
||||
EXPECT_GT(result, 0);
|
||||
}
|
||||
|
||||
// Test empty string
|
||||
{
|
||||
std::string statm = "";
|
||||
long result = parseStatmRSSkB(statm);
|
||||
EXPECT_EQ(result, -1);
|
||||
}
|
||||
|
||||
// Test malformed data (only one field)
|
||||
{
|
||||
std::string statm = "25365";
|
||||
long result = parseStatmRSSkB(statm);
|
||||
EXPECT_EQ(result, -1);
|
||||
}
|
||||
|
||||
// Test malformed data (non-numeric)
|
||||
{
|
||||
std::string statm = "abc def ghi";
|
||||
long result = parseStatmRSSkB(statm);
|
||||
EXPECT_EQ(result, -1);
|
||||
}
|
||||
|
||||
// Test malformed data (second field non-numeric)
|
||||
{
|
||||
std::string statm = "25365 abc 2377";
|
||||
long result = parseStatmRSSkB(statm);
|
||||
EXPECT_EQ(result, -1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST(mallocTrim, without_debug_logging)
|
||||
{
|
||||
beast::Journal journal{beast::Journal::getNullSink()};
|
||||
|
||||
MallocTrimReport report = mallocTrim("without_debug", journal);
|
||||
|
||||
#if defined(__GLIBC__) && BOOST_OS_LINUX
|
||||
EXPECT_EQ(report.supported, true);
|
||||
EXPECT_GE(report.trimResult, 0);
|
||||
EXPECT_EQ(report.durationUs, std::chrono::microseconds{-1});
|
||||
EXPECT_EQ(report.minfltDelta, -1);
|
||||
EXPECT_EQ(report.majfltDelta, -1);
|
||||
#else
|
||||
EXPECT_EQ(report.supported, false);
|
||||
EXPECT_EQ(report.trimResult, -1);
|
||||
EXPECT_EQ(report.rssBeforeKB, -1);
|
||||
EXPECT_EQ(report.rssAfterKB, -1);
|
||||
EXPECT_EQ(report.durationUs, std::chrono::microseconds{-1});
|
||||
EXPECT_EQ(report.minfltDelta, -1);
|
||||
EXPECT_EQ(report.majfltDelta, -1);
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(mallocTrim, empty_tag)
|
||||
{
|
||||
beast::Journal journal{beast::Journal::getNullSink()};
|
||||
MallocTrimReport report = mallocTrim("", journal);
|
||||
|
||||
#if defined(__GLIBC__) && BOOST_OS_LINUX
|
||||
EXPECT_EQ(report.supported, true);
|
||||
EXPECT_GE(report.trimResult, 0);
|
||||
#else
|
||||
EXPECT_EQ(report.supported, false);
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(mallocTrim, with_debug_logging)
|
||||
{
|
||||
struct DebugSink : public beast::Journal::Sink
|
||||
{
|
||||
DebugSink() : Sink(beast::severities::kDebug, false)
|
||||
{
|
||||
}
|
||||
void
|
||||
write(beast::severities::Severity, std::string const&) override
|
||||
{
|
||||
}
|
||||
void
|
||||
writeAlways(beast::severities::Severity, std::string const&) override
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
DebugSink sink;
|
||||
beast::Journal journal{sink};
|
||||
|
||||
MallocTrimReport report = mallocTrim("debug_test", journal);
|
||||
|
||||
#if defined(__GLIBC__) && BOOST_OS_LINUX
|
||||
EXPECT_EQ(report.supported, true);
|
||||
EXPECT_GE(report.trimResult, 0);
|
||||
EXPECT_GE(report.durationUs.count(), 0);
|
||||
EXPECT_GE(report.minfltDelta, 0);
|
||||
EXPECT_GE(report.majfltDelta, 0);
|
||||
#else
|
||||
EXPECT_EQ(report.supported, false);
|
||||
EXPECT_EQ(report.trimResult, -1);
|
||||
EXPECT_EQ(report.durationUs, std::chrono::microseconds{-1});
|
||||
EXPECT_EQ(report.minfltDelta, -1);
|
||||
EXPECT_EQ(report.majfltDelta, -1);
|
||||
#endif
|
||||
}
|
||||
|
||||
TEST(mallocTrim, repeated_calls)
|
||||
{
|
||||
beast::Journal journal{beast::Journal::getNullSink()};
|
||||
|
||||
// Call malloc_trim multiple times to ensure it's safe
|
||||
for (int i = 0; i < 5; ++i)
|
||||
{
|
||||
MallocTrimReport report = mallocTrim("iteration_" + std::to_string(i), journal);
|
||||
|
||||
#if defined(__GLIBC__) && BOOST_OS_LINUX
|
||||
EXPECT_EQ(report.supported, true);
|
||||
EXPECT_GE(report.trimResult, 0);
|
||||
#else
|
||||
EXPECT_EQ(report.supported, false);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -31,7 +31,6 @@
|
||||
#include <xrpld/shamap/NodeFamily.h>
|
||||
|
||||
#include <xrpl/basics/ByteUtilities.h>
|
||||
#include <xrpl/basics/MallocTrim.h>
|
||||
#include <xrpl/basics/ResolverAsio.h>
|
||||
#include <xrpl/basics/random.h>
|
||||
#include <xrpl/beast/asio/io_latency_probe.h>
|
||||
@@ -52,6 +51,7 @@
|
||||
#include <xrpl/resource/Fees.h>
|
||||
#include <xrpl/server/LoadFeeTrack.h>
|
||||
#include <xrpl/server/Wallet.h>
|
||||
#include <xrpl/telemetry/Telemetry.h>
|
||||
#include <xrpl/tx/apply.h>
|
||||
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
@@ -147,6 +147,7 @@ public:
|
||||
|
||||
beast::Journal m_journal;
|
||||
std::unique_ptr<perf::PerfLog> perfLog_;
|
||||
std::unique_ptr<telemetry::Telemetry> telemetry_;
|
||||
Application::MutexType m_masterMutex;
|
||||
|
||||
// Required by the SHAMapStore
|
||||
@@ -258,6 +259,14 @@ public:
|
||||
logs_->journal("PerfLog"),
|
||||
[this] { signalStop("PerfLog"); }))
|
||||
|
||||
, telemetry_(
|
||||
telemetry::make_Telemetry(
|
||||
telemetry::setup_Telemetry(
|
||||
config_->section("telemetry"),
|
||||
"", // nodePublicKey not yet available at this point
|
||||
BuildInfo::getVersionString()),
|
||||
logs_->journal("Telemetry")))
|
||||
|
||||
, m_txMaster(*this)
|
||||
|
||||
, m_collectorManager(
|
||||
@@ -618,6 +627,12 @@ public:
|
||||
return *perfLog_;
|
||||
}
|
||||
|
||||
telemetry::Telemetry&
|
||||
getTelemetry() override
|
||||
{
|
||||
return *telemetry_;
|
||||
}
|
||||
|
||||
NodeCache&
|
||||
getTempNodeCache() override
|
||||
{
|
||||
@@ -1054,8 +1069,6 @@ public:
|
||||
<< "; size after: " << cachedSLEs_.size();
|
||||
}
|
||||
|
||||
mallocTrim("doSweep", m_journal);
|
||||
|
||||
// Set timer to do another sweep later.
|
||||
setSweepTimer();
|
||||
}
|
||||
@@ -1466,6 +1479,7 @@ ApplicationImp::start(bool withTimers)
|
||||
|
||||
ledgerCleaner_->start();
|
||||
perfLog_->start();
|
||||
telemetry_->start();
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1556,6 +1570,7 @@ ApplicationImp::run()
|
||||
ledgerCleaner_->stop();
|
||||
m_nodeStore->stop();
|
||||
perfLog_->stop();
|
||||
telemetry_->stop();
|
||||
|
||||
JLOG(m_journal.info()) << "Done.";
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <xrpld/rpc/Role.h>
|
||||
#include <xrpld/rpc/detail/Handler.h>
|
||||
#include <xrpld/rpc/detail/Tuning.h>
|
||||
#include <xrpld/telemetry/TracingInstrumentation.h>
|
||||
|
||||
#include <xrpl/basics/Log.h>
|
||||
#include <xrpl/core/JobQueue.h>
|
||||
@@ -157,6 +158,11 @@ template <class Object, class Method>
|
||||
Status
|
||||
callMethod(JsonContext& context, Method method, std::string const& name, Object& result)
|
||||
{
|
||||
XRPL_TRACE_RPC(context.app.getTelemetry(), "rpc.command." + name);
|
||||
XRPL_TRACE_SET_ATTR("xrpl.rpc.command", name.c_str());
|
||||
XRPL_TRACE_SET_ATTR("xrpl.rpc.version", static_cast<int64_t>(context.apiVersion));
|
||||
XRPL_TRACE_SET_ATTR("xrpl.rpc.role", (context.role == Role::ADMIN ? "admin" : "user"));
|
||||
|
||||
static std::atomic<std::uint64_t> requestId{0};
|
||||
auto& perfLog = context.app.getPerfLog();
|
||||
std::uint64_t const curId = ++requestId;
|
||||
@@ -172,12 +178,15 @@ callMethod(JsonContext& context, Method method, std::string const& name, Object&
|
||||
JLOG(context.j.debug()) << "RPC call " << name << " completed in "
|
||||
<< ((end - start).count() / 1000000000.0) << "seconds";
|
||||
perfLog.rpcFinish(name, curId);
|
||||
XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "success");
|
||||
return ret;
|
||||
}
|
||||
catch (std::exception& e)
|
||||
{
|
||||
perfLog.rpcError(name, curId);
|
||||
JLOG(context.j.info()) << "Caught throw: " << e.what();
|
||||
XRPL_TRACE_EXCEPTION(e);
|
||||
XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "error");
|
||||
|
||||
if (context.loadType == Resource::feeReferenceRPC)
|
||||
context.loadType = Resource::feeExceptionRPC;
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <xrpld/rpc/detail/Tuning.h>
|
||||
#include <xrpld/rpc/detail/WSInfoSub.h>
|
||||
#include <xrpld/rpc/json_body.h>
|
||||
#include <xrpld/telemetry/TracingInstrumentation.h>
|
||||
|
||||
#include <xrpl/basics/Log.h>
|
||||
#include <xrpl/basics/base64.h>
|
||||
@@ -267,6 +268,8 @@ buffers_to_string(ConstBufferSequence const& bs)
|
||||
void
|
||||
ServerHandler::onRequest(Session& session)
|
||||
{
|
||||
XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.request");
|
||||
|
||||
// Make sure RPC is enabled on the port
|
||||
if (session.port().protocol.count("http") == 0 && session.port().protocol.count("https") == 0)
|
||||
{
|
||||
@@ -378,6 +381,7 @@ ServerHandler::processSession(
|
||||
std::shared_ptr<JobQueue::Coro> const& coro,
|
||||
Json::Value const& jv)
|
||||
{
|
||||
XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.ws_message");
|
||||
auto is = std::static_pointer_cast<WSInfoSub>(session->appDefined);
|
||||
if (is->getConsumer().disconnect(m_journal))
|
||||
{
|
||||
@@ -566,6 +570,7 @@ ServerHandler::processRequest(
|
||||
std::string_view forwardedFor,
|
||||
std::string_view user)
|
||||
{
|
||||
XRPL_TRACE_RPC(app_.getTelemetry(), "rpc.process");
|
||||
auto rpcJ = app_.journal("RPC");
|
||||
|
||||
Json::Value jsonOrig;
|
||||
|
||||
115
src/xrpld/telemetry/TracingInstrumentation.h
Normal file
115
src/xrpld/telemetry/TracingInstrumentation.h
Normal file
@@ -0,0 +1,115 @@
|
||||
#pragma once
|
||||
|
||||
/** Convenience macros for instrumenting code with OpenTelemetry trace spans.
|
||||
|
||||
When XRPL_ENABLE_TELEMETRY is defined, the macros create SpanGuard objects
|
||||
that manage span lifetime via RAII. When not defined, all macros expand to
|
||||
((void)0) with zero overhead.
|
||||
|
||||
Usage in instrumented code:
|
||||
@code
|
||||
XRPL_TRACE_RPC(app.getTelemetry(), "rpc.command." + name);
|
||||
XRPL_TRACE_SET_ATTR("xrpl.rpc.command", name);
|
||||
XRPL_TRACE_SET_ATTR("xrpl.rpc.status", "success");
|
||||
@endcode
|
||||
|
||||
@note Macro parameter names use leading/trailing underscores
|
||||
(e.g. _tel_obj_) to avoid colliding with identifiers in the macro body,
|
||||
specifically the ::xrpl::telemetry:: namespace qualifier.
|
||||
*/
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
|
||||
#include <xrpl/telemetry/SpanGuard.h>
|
||||
#include <xrpl/telemetry/Telemetry.h>
|
||||
|
||||
#include <optional>
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
/** Start an unconditional span, ended when the guard goes out of scope.
|
||||
@param _tel_obj_ Telemetry instance reference.
|
||||
@param _span_name_ Span name string.
|
||||
*/
|
||||
#define XRPL_TRACE_SPAN(_tel_obj_, _span_name_) \
|
||||
auto _xrpl_span_ = (_tel_obj_).startSpan(_span_name_); \
|
||||
::xrpl::telemetry::SpanGuard _xrpl_guard_(_xrpl_span_)
|
||||
|
||||
/** Start an unconditional span with a specific SpanKind.
|
||||
@param _tel_obj_ Telemetry instance reference.
|
||||
@param _span_name_ Span name string.
|
||||
@param _span_kind_ opentelemetry::trace::SpanKind value.
|
||||
*/
|
||||
#define XRPL_TRACE_SPAN_KIND(_tel_obj_, _span_name_, _span_kind_) \
|
||||
auto _xrpl_span_ = (_tel_obj_).startSpan(_span_name_, _span_kind_); \
|
||||
::xrpl::telemetry::SpanGuard _xrpl_guard_(_xrpl_span_)
|
||||
|
||||
/** Conditionally start a span for RPC tracing.
|
||||
The span is only created if shouldTraceRpc() returns true.
|
||||
@param _tel_obj_ Telemetry instance reference.
|
||||
@param _span_name_ Span name string.
|
||||
*/
|
||||
#define XRPL_TRACE_RPC(_tel_obj_, _span_name_) \
|
||||
std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
|
||||
if ((_tel_obj_).shouldTraceRpc()) \
|
||||
{ \
|
||||
_xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \
|
||||
}
|
||||
|
||||
/** Conditionally start a span for transaction tracing.
|
||||
The span is only created if shouldTraceTransactions() returns true.
|
||||
@param _tel_obj_ Telemetry instance reference.
|
||||
@param _span_name_ Span name string.
|
||||
*/
|
||||
#define XRPL_TRACE_TX(_tel_obj_, _span_name_) \
|
||||
std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
|
||||
if ((_tel_obj_).shouldTraceTransactions()) \
|
||||
{ \
|
||||
_xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \
|
||||
}
|
||||
|
||||
/** Conditionally start a span for consensus tracing.
|
||||
The span is only created if shouldTraceConsensus() returns true.
|
||||
@param _tel_obj_ Telemetry instance reference.
|
||||
@param _span_name_ Span name string.
|
||||
*/
|
||||
#define XRPL_TRACE_CONSENSUS(_tel_obj_, _span_name_) \
|
||||
std::optional<::xrpl::telemetry::SpanGuard> _xrpl_guard_; \
|
||||
if ((_tel_obj_).shouldTraceConsensus()) \
|
||||
{ \
|
||||
_xrpl_guard_.emplace((_tel_obj_).startSpan(_span_name_)); \
|
||||
}
|
||||
|
||||
/** Set a key-value attribute on the current span (if it exists).
|
||||
Must be used after one of the XRPL_TRACE_* span macros.
|
||||
*/
|
||||
#define XRPL_TRACE_SET_ATTR(key, value) \
|
||||
if (_xrpl_guard_.has_value()) \
|
||||
{ \
|
||||
_xrpl_guard_->setAttribute(key, value); \
|
||||
}
|
||||
|
||||
/** Record an exception on the current span and mark it as error.
|
||||
Must be used after one of the XRPL_TRACE_* span macros.
|
||||
*/
|
||||
#define XRPL_TRACE_EXCEPTION(e) \
|
||||
if (_xrpl_guard_.has_value()) \
|
||||
{ \
|
||||
_xrpl_guard_->recordException(e); \
|
||||
}
|
||||
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
|
||||
#else // XRPL_ENABLE_TELEMETRY not defined
|
||||
|
||||
#define XRPL_TRACE_SPAN(_tel_obj_, _span_name_) ((void)0)
|
||||
#define XRPL_TRACE_SPAN_KIND(_tel_obj_, _span_name_, _span_kind_) ((void)0)
|
||||
#define XRPL_TRACE_RPC(_tel_obj_, _span_name_) ((void)0)
|
||||
#define XRPL_TRACE_TX(_tel_obj_, _span_name_) ((void)0)
|
||||
#define XRPL_TRACE_CONSENSUS(_tel_obj_, _span_name_) ((void)0)
|
||||
#define XRPL_TRACE_SET_ATTR(key, value) ((void)0)
|
||||
#define XRPL_TRACE_EXCEPTION(e) ((void)0)
|
||||
|
||||
#endif // XRPL_ENABLE_TELEMETRY
|
||||
Reference in New Issue
Block a user