From 3852b5ae4bf09b7b30df2ea912f1627ade7b46cf Mon Sep 17 00:00:00 2001 From: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com> Date: Thu, 16 Apr 2026 17:07:01 +0100 Subject: [PATCH] fix(telemetry): address review findings and PR #6437 comments Critical fixes: - Restore accidentally removed mallocTrim call and MallocTrim.h include - Add missing shouldTraceLedger() to interface and all implementations - Derive networkId/networkType from config_->NETWORK_ID (0=mainnet, 1=testnet, 2=devnet) instead of leaving defaults unpopulated - Clamp sampling_ratio to [0.0, 1.0] in config parser PR comment fixes: - Rename rippled -> xrpld in service name defaults, getTracer() calls, Docker network, comments, and docs/build/telemetry.md - Remove exporter config option (only otlp_http supported) - Add trace_ledger and service_name to example config - Clarify head-based sampling semantics in config comments - Add filter descriptions for span intrinsic filters in Grafana datasource - Add inline comments to Docker Compose services Docker/config improvements: - Remove deprecated version: "3.8" from docker-compose.yml - Pin images: collector 0.121.0, grafana 11.5.2 - Add health_check extension to otel-collector-config.yaml - Comment out Tempo metrics_generator remote_write (no Prometheus service) - Add Prometheus datasource caveat in Grafana datasource config Other: - Revert unrelated formatting changes in ServiceRegistry.h - Change Conan telemetry default to False (matches CMake OFF) - Add CLAUDE.md-required docs (ASCII diagrams, usage examples, @note thread-safety) to Telemetry.h and SpanGuard.h Co-Authored-By: Claude Opus 4.6 --- cfg/xrpld-example.cfg | 18 +++-- conanfile.py | 2 +- docker/telemetry/docker-compose.yml | 44 +++++++----- .../provisioning/datasources/tempo.yaml | 20 ++++-- docker/telemetry/otel-collector-config.yaml | 9 ++- docker/telemetry/tempo.yaml | 8 ++- docs/build/telemetry.md | 23 +++---- include/xrpl/core/ServiceRegistry.h | 21 +++--- include/xrpl/telemetry/SpanGuard.h | 55 +++++++++++++++ include/xrpl/telemetry/Telemetry.h | 69 +++++++++++++++++-- src/libxrpl/telemetry/NullTelemetry.cpp | 6 ++ src/libxrpl/telemetry/Telemetry.cpp | 16 ++++- src/libxrpl/telemetry/TelemetryConfig.cpp | 36 +++++++++- src/xrpld/app/main/Application.cpp | 5 +- 14 files changed, 263 insertions(+), 69 deletions(-) diff --git a/cfg/xrpld-example.cfg b/cfg/xrpld-example.cfg index f5d3a58019..eff308032c 100644 --- a/cfg/xrpld-example.cfg +++ b/cfg/xrpld-example.cfg @@ -1613,17 +1613,21 @@ validators.txt # # Enable or disable telemetry at runtime. Default: 0 (disabled). # +# service_name=xrpld +# +# OTel resource attribute `service.name`. Default: xrpld. +# The node's network ID (from [network_id]) is automatically added +# as the `xrpl.network.id` and `xrpl.network.type` resource attributes. +# # endpoint=http://localhost:4318/v1/traces # # The OpenTelemetry Collector endpoint (OTLP/HTTP). Default: http://localhost:4318/v1/traces. # -# exporter=otlp_http -# -# Exporter type: otlp_http. Default: otlp_http. -# # sampling_ratio=1.0 # -# Fraction of traces to sample (0.0 to 1.0). Default: 1.0 (all traces). +# Head-based sampling ratio: the fraction of traces to keep, decided at +# span creation time (before the trace completes). Values in [0.0, 1.0]. +# 1.0 = trace everything, 0.1 = sample ~10% of traces. Default: 1.0. # # trace_rpc=1 # @@ -1641,3 +1645,7 @@ validators.txt # # Enable peer message tracing (high volume). Default: 0. # +# trace_ledger=1 +# +# Enable ledger close/accept tracing. Default: 1. +# diff --git a/conanfile.py b/conanfile.py index c44abe47da..9630238ef6 100644 --- a/conanfile.py +++ b/conanfile.py @@ -55,7 +55,7 @@ class Xrpl(ConanFile): "rocksdb": True, "shared": False, "static": True, - "telemetry": True, + "telemetry": False, "tests": False, "unity": False, "xrpld": False, diff --git a/docker/telemetry/docker-compose.yml b/docker/telemetry/docker-compose.yml index b359cc5ce2..ce0f2f3a30 100644 --- a/docker/telemetry/docker-compose.yml +++ b/docker/telemetry/docker-compose.yml @@ -1,7 +1,7 @@ -# Docker Compose stack for rippled OpenTelemetry observability. +# Docker Compose stack for xrpld OpenTelemetry observability. # # Provides services for local development: -# - otel-collector: receives OTLP traces from rippled, batches and +# - otel-collector: receives OTLP traces from xrpld, batches and # forwards them to Tempo. Listens on ports 4317 (gRPC) # and 4318 (HTTP). # - tempo: Grafana Tempo tracing backend, queryable via Grafana Explore @@ -12,56 +12,64 @@ # Usage: # docker compose -f docker/telemetry/docker-compose.yml up -d # -# Configure rippled to export traces by adding to xrpld.cfg: +# Configure xrpld to export traces by adding to xrpld.cfg: # [telemetry] # enabled=1 # endpoint=http://localhost:4318/v1/traces -version: "3.8" - services: + # OpenTelemetry Collector: receives spans from xrpld via OTLP protocol, + # batches them for efficiency, and forwards to Tempo for storage. otel-collector: - image: otel/opentelemetry-collector-contrib:latest + image: otel/opentelemetry-collector-contrib:0.121.0 command: ["--config=/etc/otel-collector-config.yaml"] ports: - - "4317:4317" # OTLP gRPC - - "4318:4318" # OTLP HTTP - - "13133:13133" # Health check + - "4317:4317" # OTLP gRPC receiver + - "4318:4318" # OTLP HTTP receiver (xrpld sends traces here) + - "13133:13133" # Health check endpoint volumes: + # Mount collector pipeline config (receivers → processors → exporters) - ./otel-collector-config.yaml:/etc/otel-collector-config.yaml:ro depends_on: - tempo networks: - - rippled-telemetry + - xrpld-telemetry + # Grafana Tempo: distributed tracing backend that stores and indexes + # spans. Queryable via TraceQL in Grafana Explore. tempo: image: grafana/tempo:2.7.2 command: ["-config.file=/etc/tempo.yaml"] ports: - - "3200:3200" # Tempo HTTP API (health, query) + - "3200:3200" # Tempo HTTP API (health check, query) volumes: + # Mount Tempo storage and ingestion config - ./tempo.yaml:/etc/tempo.yaml:ro + # Persistent volume for trace data (WAL + blocks) - tempo-data:/var/tempo networks: - - rippled-telemetry + - xrpld-telemetry + # Grafana: visualization UI with Tempo pre-configured as a datasource. + # Anonymous admin access enabled for local development convenience. grafana: - image: grafana/grafana:latest + image: grafana/grafana:11.5.2 environment: - - GF_AUTH_ANONYMOUS_ENABLED=true - - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin + - GF_AUTH_ANONYMOUS_ENABLED=true # No login required for local dev + - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin # Full access without auth ports: - - "3000:3000" + - "3000:3000" # Grafana web UI volumes: + # Auto-provision Tempo datasource and search filters on startup - ./grafana/provisioning:/etc/grafana/provisioning:ro depends_on: - tempo networks: - - rippled-telemetry + - xrpld-telemetry volumes: tempo-data: networks: - rippled-telemetry: + xrpld-telemetry: driver: bridge diff --git a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml index 11b89458a8..825d55453c 100644 --- a/docker/telemetry/grafana/provisioning/datasources/tempo.yaml +++ b/docker/telemetry/grafana/provisioning/datasources/tempo.yaml @@ -1,7 +1,7 @@ # Grafana datasource provisioning for Grafana Tempo. # Auto-configures Tempo as a trace data source on Grafana startup. # Access Grafana at http://localhost:3000, then use Explore -> Tempo -# to browse rippled traces using TraceQL. +# to browse xrpld traces using TraceQL. # # Search filters provide pre-configured dropdowns in the Explore UI. # Each phase adds filters for the span attributes it introduces. @@ -18,6 +18,9 @@ datasources: jsonData: nodeGraph: enabled: true + # Service map and traces-to-metrics require a Prometheus datasource + # (not included in this stack). These features are inactive until a + # Prometheus service is added to docker-compose.yml. serviceMap: datasourceUid: prometheus tracesToMetrics: @@ -27,7 +30,7 @@ datasources: search: filters: # --- Node identification filters --- - # service.name: logical service name (default: "rippled"). + # service.name: logical service name (default: "xrpld"). # Useful when running multiple service types in the same collector. - id: service-name tag: service.name @@ -42,7 +45,7 @@ datasources: operator: "=" scope: resource type: static - # service.version: rippled build version (e.g., "2.4.0-b1"). + # service.version: xrpld build version (e.g., "2.4.0-b1"). # Filter traces from specific software releases. - id: node-version tag: service.version @@ -51,29 +54,36 @@ datasources: type: dynamic # xrpl.network.id: numeric network identifier # (0 = mainnet, 1 = testnet, 2 = devnet, etc.). + # Derived from the [network_id] config section. - id: network-id tag: xrpl.network.id operator: "=" scope: resource type: dynamic - # xrpl.network.type: human-readable network name - # ("mainnet", "testnet", "devnet", "standalone"). + # xrpl.network.type: human-readable network name derived from + # network ID ("mainnet", "testnet", "devnet", "unknown"). - id: network-type tag: xrpl.network.type operator: "=" scope: resource type: static # --- Span intrinsic filters --- + # name: the span operation name (e.g., "rpc.command.server_info"). + # Use to find traces for a specific RPC command or subsystem. - id: span-name tag: name operator: "=" scope: intrinsic type: static + # status: span completion status ("ok", "error", "unset"). + # Filter for failed operations to diagnose errors. - id: span-status tag: status operator: "=" scope: intrinsic type: static + # duration: span wall-clock duration. Use with ">" operator + # to find slow operations (e.g., duration > 500ms). - id: span-duration tag: duration operator: ">" diff --git a/docker/telemetry/otel-collector-config.yaml b/docker/telemetry/otel-collector-config.yaml index 4dc5aaa2f6..104f03dd7c 100644 --- a/docker/telemetry/otel-collector-config.yaml +++ b/docker/telemetry/otel-collector-config.yaml @@ -1,7 +1,7 @@ -# OpenTelemetry Collector configuration for rippled development. +# OpenTelemetry Collector configuration for xrpld development. # # Pipeline: OTLP receiver -> batch processor -> debug + Tempo. -# rippled sends traces via OTLP/HTTP to port 4318. The collector batches +# xrpld sends traces via OTLP/HTTP to port 4318. The collector batches # them and forwards to Tempo via OTLP/gRPC on the Docker network. Tempo # is queryable via Grafana Explore using TraceQL. @@ -26,7 +26,12 @@ exporters: tls: insecure: true +extensions: + health_check: + endpoint: 0.0.0.0:13133 + service: + extensions: [health_check] pipelines: traces: receivers: [otlp] diff --git a/docker/telemetry/tempo.yaml b/docker/telemetry/tempo.yaml index 824cc9fae9..7e56f60c6d 100644 --- a/docker/telemetry/tempo.yaml +++ b/docker/telemetry/tempo.yaml @@ -1,4 +1,4 @@ -# Grafana Tempo configuration for rippled telemetry stack. +# Grafana Tempo configuration for xrpld telemetry stack. # # Runs in single-binary mode for local development. # Receives traces via OTLP/gRPC from the OTel Collector and stores @@ -40,8 +40,10 @@ metrics_generator: source: tempo storage: path: /var/tempo/generator/wal - remote_write: - - url: http://prometheus:9090/api/v1/write + # Uncomment and add a Prometheus service to docker-compose.yml + # to enable remote_write for service graph metrics: + # remote_write: + # - url: http://prometheus:9090/api/v1/write overrides: defaults: diff --git a/docs/build/telemetry.md b/docs/build/telemetry.md index fce29ae719..f3e571fa16 100644 --- a/docs/build/telemetry.md +++ b/docs/build/telemetry.md @@ -1,8 +1,8 @@ -# OpenTelemetry Tracing for Rippled +# OpenTelemetry Tracing for xrpld -This document explains how to build rippled with OpenTelemetry distributed tracing support, configure the runtime telemetry options, and set up the observability backend to view traces. +This document explains how to build xrpld with OpenTelemetry distributed tracing support, configure the runtime telemetry options, and set up the observability backend to view traces. -- [OpenTelemetry Tracing for Rippled](#opentelemetry-tracing-for-rippled) +- [OpenTelemetry Tracing for xrpld](#opentelemetry-tracing-for-xrpld) - [Overview](#overview) - [Building with Telemetry](#building-with-telemetry) - [Summary](#summary) @@ -28,7 +28,7 @@ This document explains how to build rippled with OpenTelemetry distributed traci ## Overview -Rippled supports optional [OpenTelemetry](https://opentelemetry.io/) distributed tracing. +xrpld supports optional [OpenTelemetry](https://opentelemetry.io/) distributed tracing. When enabled, it instruments RPC requests with trace spans that are exported via OTLP/HTTP to an OpenTelemetry Collector, which forwards them to a tracing backend such as Grafana Tempo. @@ -55,7 +55,7 @@ Follow the same instructions as mentioned in [BUILD.md](../../BUILD.md) but with ### Build steps ```bash -cd /path/to/rippled +cd /path/to/xrpld rm -rf .build mkdir .build cd .build @@ -119,13 +119,13 @@ Add a `[telemetry]` section to your `xrpld.cfg` file: ```ini [telemetry] enabled=1 -service_name=rippled endpoint=http://localhost:4318/v1/traces sampling_ratio=1.0 trace_rpc=1 trace_transactions=1 trace_consensus=1 trace_peer=0 +trace_ledger=1 ``` ### Configuration options @@ -133,13 +133,12 @@ trace_peer=0 | Option | Type | Default | Description | | --------------------- | ------ | --------------------------------- | -------------------------------------------------- | | `enabled` | int | `0` | Enable (`1`) or disable (`0`) telemetry at runtime | -| `service_name` | string | `rippled` | Service name reported in traces | +| `service_name` | string | `xrpld` | Service name reported in traces | | `service_instance_id` | string | node public key | Unique instance identifier | -| `exporter` | string | `otlp_http` | Exporter type | | `endpoint` | string | `http://localhost:4318/v1/traces` | OTLP/HTTP collector endpoint | | `use_tls` | int | `0` | Enable TLS for the exporter connection | | `tls_ca_cert` | string | (empty) | Path to CA certificate for TLS | -| `sampling_ratio` | double | `1.0` | Fraction of traces to sample (`0.0` to `1.0`) | +| `sampling_ratio` | double | `1.0` | Head-based sampling ratio (`0.0` to `1.0`) | | `batch_size` | uint32 | `512` | Maximum spans per export batch | | `batch_delay_ms` | uint32 | `5000` | Maximum delay (ms) before flushing a batch | | `max_queue_size` | uint32 | `2048` | Maximum spans queued in memory | @@ -179,7 +178,7 @@ open http://localhost:3000 1. Open `http://localhost:3000` in a browser. 2. Navigate to **Explore** and select the **Tempo** datasource. -3. Use **Search** or **TraceQL** to find traces by service name (e.g. `rippled`). +3. Use **Search** or **TraceQL** to find traces by service name (e.g. `xrpld`). 4. Click into any trace to see the span tree and attributes. Traced RPC operations produce a span hierarchy like: @@ -210,7 +209,7 @@ silently drops spans with no impact on test results. ./xrpld --unittest --unittest-jobs $(nproc) ``` -To generate traces during manual testing, start rippled in standalone mode: +To generate traces during manual testing, start xrpld in standalone mode: ```bash ./xrpld --conf /path/to/xrpld.cfg --standalone --start @@ -230,7 +229,7 @@ curl -s -X POST http://127.0.0.1:5005/ \ 1. Confirm the OTel Collector is running: `docker compose -f docker/telemetry/docker-compose.yml ps` 2. Check collector logs for errors: `docker compose -f docker/telemetry/docker-compose.yml logs otel-collector` -3. Confirm `[telemetry] enabled=1` is set in the rippled config. +3. Confirm `[telemetry] enabled=1` is set in the xrpld config. 4. Confirm `endpoint` points to the correct collector address (`http://localhost:4318/v1/traces`). 5. Wait for the batch delay to elapse (default `5000` ms) before checking Grafana Explore. diff --git a/include/xrpl/core/ServiceRegistry.h b/include/xrpl/core/ServiceRegistry.h index 55f328cf45..019332baba 100644 --- a/include/xrpl/core/ServiceRegistry.h +++ b/include/xrpl/core/ServiceRegistry.h @@ -25,17 +25,18 @@ class Telemetry; // This is temporary until we migrate all code to use ServiceRegistry. class Application; -template +template < + class Key, + class T, + bool IsKeyCache, + class SharedWeakUnionPointer, + class SharedPointerType, + class Hash, + class KeyEqual, + class Mutex> class TaggedCache; class STLedgerEntry; -using SLE = STLedgerEntry; +using SLE = STLedgerEntry; using CachedSLEs = TaggedCache; // Forward declarations @@ -93,7 +94,7 @@ using NodeCache = TaggedCache; class ServiceRegistry { public: - ServiceRegistry() = default; + ServiceRegistry() = default; virtual ~ServiceRegistry() = default; // Core infrastructure services diff --git a/include/xrpl/telemetry/SpanGuard.h b/include/xrpl/telemetry/SpanGuard.h index 07ad8e9ae7..39ea99ff7a 100644 --- a/include/xrpl/telemetry/SpanGuard.h +++ b/include/xrpl/telemetry/SpanGuard.h @@ -6,10 +6,65 @@ activated on the current thread's context (via Scope). On destruction, the span is ended and the previous context is restored. + Dependency diagram: + + +------------------------------------+ + | SpanGuard | + +------------------------------------+ + | - span_ : shared_ptr | + | - scope_ : Scope | + +------------------------------------+ + | uses + +-------+-------+ + | | + +--------+ +-------------+ + | Span | | Scope | + | (OTel) | | (OTel, non- | + | | | movable) | + +--------+ +-------------+ + Used by the XRPL_TRACE_* macros in TracingInstrumentation.h. Can also be stored in std::optional for conditional tracing (move-constructible). Only compiled when XRPL_ENABLE_TELEMETRY is defined. + + Usage examples: + + 1. Basic RAII tracing: + @code + { + SpanGuard guard(telemetry.startSpan("rpc.command.submit")); + guard.setAttribute("xrpl.rpc.command", "submit"); + // ... span is active on this thread's context + } // span ended, previous context restored + @endcode + + 2. Conditional tracing with std::optional: + @code + std::optional guard; + if (telemetry.isEnabled() && telemetry.shouldTraceRpc()) + guard.emplace(telemetry.startSpan("rpc.request")); + // ... guard may or may not hold a span + @endcode + + 3. Error recording: + @code + SpanGuard guard(telemetry.startSpan("rpc.command.submit")); + try { + // ... do work + guard.setOk(); + } catch (std::exception const& e) { + guard.recordException(e); // sets status to error + } + @endcode + + @note Thread safety: A SpanGuard must only be used on the thread where + it was constructed (the Scope binds to the thread-local context stack). + Use context() to propagate the trace to other threads. + + @note Limitation: Move assignment is deleted because re-scoping a span + mid-flight would corrupt the context stack. Only move construction is + supported (for std::optional emplacement). */ #ifdef XRPL_ENABLE_TELEMETRY diff --git a/include/xrpl/telemetry/Telemetry.h b/include/xrpl/telemetry/Telemetry.h index c6febd5f84..0599e783ae 100644 --- a/include/xrpl/telemetry/Telemetry.h +++ b/include/xrpl/telemetry/Telemetry.h @@ -3,19 +3,70 @@ /** Abstract interface for OpenTelemetry distributed tracing. Provides the Telemetry base class that all components use to create trace - spans. Two implementations exist: + spans. Two concrete implementations exist, selected at construction time + by make_Telemetry(): - TelemetryImpl (Telemetry.cpp): real OTel SDK integration, compiled only when XRPL_ENABLE_TELEMETRY is defined and enabled at runtime. - NullTelemetry (NullTelemetry.cpp): no-op stub used when telemetry is disabled at compile time or runtime. + Inheritance / dependency diagram: + + +--------------------+ + | Telemetry | (abstract, this file) + | <> | + +---------+----------+ + | + +---------+-----------+-------------------+ + | | | + +---+------------+ +-----+---------+ +------+----------+ + | TelemetryImpl | | NullTelemetry | | NullTelemetryOtel| + | (Telemetry.cpp)| |(NullTelemetry | | (Telemetry.cpp) | + | OTel SDK | | .cpp) | | noop w/ OTel API | + +----------------+ +---------------+ +------------------+ + The Setup struct holds all configuration parsed from the [telemetry] section of xrpld.cfg. See TelemetryConfig.cpp for the parser and cfg/xrpld-example.cfg for the available options. OTel SDK headers are conditionally included behind XRPL_ENABLE_TELEMETRY so that builds without telemetry have zero dependency on opentelemetry-cpp. + + Usage examples: + + 1. Check before tracing (typical guard pattern): + @code + auto& telemetry = registry.getTelemetry(); + if (telemetry.isEnabled() && telemetry.shouldTraceRpc()) + { + auto span = telemetry.startSpan("rpc.command.server_info"); + // ... do work, span ends when shared_ptr refcount drops to 0 + } + @endcode + + 2. RAII tracing with SpanGuard (preferred): + @code + if (telemetry.isEnabled() && telemetry.shouldTraceRpc()) + { + SpanGuard guard(telemetry.startSpan("rpc.command.submit")); + guard.setAttribute("xrpl.rpc.command", "submit"); + // ... guard ends span automatically on scope exit + } + @endcode + + 3. Cross-thread context propagation: + @code + // On thread A: capture context + auto ctx = guard.context(); + // On thread B: create child span with explicit parent + auto child = telemetry.startSpan("async.work", ctx); + @endcode + + @note Thread safety: The Telemetry interface is safe for concurrent reads + (isEnabled, shouldTrace*, getTracer, startSpan) after start() completes. + setServiceInstanceId() must be called before start() and is not thread-safe. + The OTel SDK's TracerProvider and Tracer are internally thread-safe. */ #include @@ -50,7 +101,7 @@ public: bool enabled = false; /** OTel resource attribute `service.name`. */ - std::string serviceName = "rippled"; + std::string serviceName = "xrpld"; /** OTel resource attribute `service.version` (set from BuildInfo). */ std::string serviceVersion; @@ -59,9 +110,6 @@ public: public key). */ std::string serviceInstanceId; - /** Exporter type: currently only "otlp_http" is supported. */ - std::string exporterType = "otlp_http"; - /** OTLP/HTTP endpoint URL where spans are sent. */ std::string exporterEndpoint = "http://localhost:4318/v1/traces"; @@ -157,6 +205,10 @@ public: virtual bool shouldTracePeer() const = 0; + /** @return true if ledger close/accept should be traced. */ + virtual bool + shouldTraceLedger() const = 0; + #ifdef XRPL_ENABLE_TELEMETRY /** Get or create a named tracer instance. @@ -164,7 +216,7 @@ public: @return A shared pointer to the Tracer. */ virtual opentelemetry::nostd::shared_ptr - getTracer(std::string_view name = "rippled") = 0; + getTracer(std::string_view name = "xrpld") = 0; /** Start a new span on the current thread's context. @@ -214,13 +266,16 @@ make_Telemetry(Telemetry::Setup const& setup, beast::Journal journal); @param section The [telemetry] config section. @param nodePublicKey Node public key, used as default instance ID. @param version Build version string. + @param networkId Network identifier from [network_id] config + (0 = mainnet, 1 = testnet, 2 = devnet). @return A populated Setup struct with defaults for missing values. */ Telemetry::Setup setup_Telemetry( Section const& section, std::string const& nodePublicKey, - std::string const& version); + std::string const& version, + std::uint32_t networkId); } // namespace telemetry } // namespace xrpl diff --git a/src/libxrpl/telemetry/NullTelemetry.cpp b/src/libxrpl/telemetry/NullTelemetry.cpp index faa81590cb..64c8f5e491 100644 --- a/src/libxrpl/telemetry/NullTelemetry.cpp +++ b/src/libxrpl/telemetry/NullTelemetry.cpp @@ -76,6 +76,12 @@ public: return false; } + bool + shouldTraceLedger() const override + { + return false; + } + #ifdef XRPL_ENABLE_TELEMETRY opentelemetry::nostd::shared_ptr getTracer(std::string_view) override diff --git a/src/libxrpl/telemetry/Telemetry.cpp b/src/libxrpl/telemetry/Telemetry.cpp index 53b7f91655..1b9f2159b4 100644 --- a/src/libxrpl/telemetry/Telemetry.cpp +++ b/src/libxrpl/telemetry/Telemetry.cpp @@ -93,6 +93,12 @@ public: return false; } + bool + shouldTraceLedger() const override + { + return false; + } + opentelemetry::nostd::shared_ptr getTracer(std::string_view) override { @@ -241,6 +247,12 @@ public: return setup_.tracePeer; } + bool + shouldTraceLedger() const override + { + return setup_.traceLedger; + } + opentelemetry::nostd::shared_ptr getTracer(std::string_view name) override { @@ -252,7 +264,7 @@ public: opentelemetry::nostd::shared_ptr startSpan(std::string_view name, trace_api::SpanKind kind) override { - auto tracer = getTracer("rippled"); + auto tracer = getTracer("xrpld"); trace_api::StartSpanOptions opts; opts.kind = kind; return tracer->StartSpan(std::string(name), opts); @@ -264,7 +276,7 @@ public: opentelemetry::context::Context const& parentContext, trace_api::SpanKind kind) override { - auto tracer = getTracer("rippled"); + auto tracer = getTracer("xrpld"); trace_api::StartSpanOptions opts; opts.kind = kind; opts.parent = parentContext; diff --git a/src/libxrpl/telemetry/TelemetryConfig.cpp b/src/libxrpl/telemetry/TelemetryConfig.cpp index c5b25023e4..16a1461286 100644 --- a/src/libxrpl/telemetry/TelemetryConfig.cpp +++ b/src/libxrpl/telemetry/TelemetryConfig.cpp @@ -9,23 +9,49 @@ #include +#include + namespace xrpl { namespace telemetry { +namespace { + +/** Derive a human-readable network type label from the numeric network ID. + @param networkId The network identifier from [network_id] config. + @return "mainnet", "testnet", "devnet", or "unknown" for other values. +*/ +std::string +networkTypeFromId(std::uint32_t networkId) +{ + switch (networkId) + { + case 0: + return "mainnet"; + case 1: + return "testnet"; + case 2: + return "devnet"; + default: + return "unknown"; + } +} + +} // namespace + Telemetry::Setup setup_Telemetry( Section const& section, std::string const& nodePublicKey, - std::string const& version) + std::string const& version, + std::uint32_t networkId) { Telemetry::Setup setup; setup.enabled = section.value_or("enabled", 0) != 0; - setup.serviceName = section.value_or("service_name", "rippled"); + setup.serviceName = section.value_or("service_name", "xrpld"); setup.serviceVersion = version; setup.serviceInstanceId = section.value_or("service_instance_id", nodePublicKey); - setup.exporterType = section.value_or("exporter", "otlp_http"); setup.exporterEndpoint = section.value_or("endpoint", "http://localhost:4318/v1/traces"); @@ -33,12 +59,16 @@ setup_Telemetry( setup.tlsCertPath = section.value_or("tls_ca_cert", ""); setup.samplingRatio = section.value_or("sampling_ratio", 1.0); + setup.samplingRatio = std::clamp(setup.samplingRatio, 0.0, 1.0); setup.batchSize = section.value_or("batch_size", 512u); setup.batchDelay = std::chrono::milliseconds{section.value_or("batch_delay_ms", 5000u)}; setup.maxQueueSize = section.value_or("max_queue_size", 2048u); + setup.networkId = networkId; + setup.networkType = networkTypeFromId(networkId); + setup.traceTransactions = section.value_or("trace_transactions", 1) != 0; setup.traceConsensus = section.value_or("trace_consensus", 1) != 0; setup.traceRpc = section.value_or("trace_rpc", 1) != 0; diff --git a/src/xrpld/app/main/Application.cpp b/src/xrpld/app/main/Application.cpp index f34cfd3aff..f9a70725ec 100644 --- a/src/xrpld/app/main/Application.cpp +++ b/src/xrpld/app/main/Application.cpp @@ -326,7 +326,8 @@ public: telemetry::setup_Telemetry( config_->section("telemetry"), "", // Updated later via setServiceInstanceId() - BuildInfo::getVersionString()), + BuildInfo::getVersionString(), + config_->NETWORK_ID), logs_->journal("Telemetry"))) , m_txMaster(*this) @@ -1141,6 +1142,8 @@ public: << "; size after: " << cachedSLEs_.size(); } + mallocTrim("doSweep", m_journal); + // Set timer to do another sweep later. setSweepTimer(); }