mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-06 10:16:45 +00:00
fix(telemetry): address review findings and PR #6437 comments
Critical fixes: - Restore accidentally removed mallocTrim call and MallocTrim.h include - Add missing shouldTraceLedger() to interface and all implementations - Derive networkId/networkType from config_->NETWORK_ID (0=mainnet, 1=testnet, 2=devnet) instead of leaving defaults unpopulated - Clamp sampling_ratio to [0.0, 1.0] in config parser PR comment fixes: - Rename rippled -> xrpld in service name defaults, getTracer() calls, Docker network, comments, and docs/build/telemetry.md - Remove exporter config option (only otlp_http supported) - Add trace_ledger and service_name to example config - Clarify head-based sampling semantics in config comments - Add filter descriptions for span intrinsic filters in Grafana datasource - Add inline comments to Docker Compose services Docker/config improvements: - Remove deprecated version: "3.8" from docker-compose.yml - Pin images: collector 0.121.0, grafana 11.5.2 - Add health_check extension to otel-collector-config.yaml - Comment out Tempo metrics_generator remote_write (no Prometheus service) - Add Prometheus datasource caveat in Grafana datasource config Other: - Revert unrelated formatting changes in ServiceRegistry.h - Change Conan telemetry default to False (matches CMake OFF) - Add CLAUDE.md-required docs (ASCII diagrams, usage examples, @note thread-safety) to Telemetry.h and SpanGuard.h Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -76,6 +76,12 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceLedger() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef XRPL_ENABLE_TELEMETRY
|
||||
opentelemetry::nostd::shared_ptr<opentelemetry::trace::Tracer>
|
||||
getTracer(std::string_view) override
|
||||
|
||||
@@ -93,6 +93,12 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceLedger() const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Tracer>
|
||||
getTracer(std::string_view) override
|
||||
{
|
||||
@@ -241,6 +247,12 @@ public:
|
||||
return setup_.tracePeer;
|
||||
}
|
||||
|
||||
bool
|
||||
shouldTraceLedger() const override
|
||||
{
|
||||
return setup_.traceLedger;
|
||||
}
|
||||
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Tracer>
|
||||
getTracer(std::string_view name) override
|
||||
{
|
||||
@@ -252,7 +264,7 @@ public:
|
||||
opentelemetry::nostd::shared_ptr<trace_api::Span>
|
||||
startSpan(std::string_view name, trace_api::SpanKind kind) override
|
||||
{
|
||||
auto tracer = getTracer("rippled");
|
||||
auto tracer = getTracer("xrpld");
|
||||
trace_api::StartSpanOptions opts;
|
||||
opts.kind = kind;
|
||||
return tracer->StartSpan(std::string(name), opts);
|
||||
@@ -264,7 +276,7 @@ public:
|
||||
opentelemetry::context::Context const& parentContext,
|
||||
trace_api::SpanKind kind) override
|
||||
{
|
||||
auto tracer = getTracer("rippled");
|
||||
auto tracer = getTracer("xrpld");
|
||||
trace_api::StartSpanOptions opts;
|
||||
opts.kind = kind;
|
||||
opts.parent = parentContext;
|
||||
|
||||
@@ -9,23 +9,49 @@
|
||||
|
||||
#include <xrpl/telemetry/Telemetry.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
|
||||
namespace {
|
||||
|
||||
/** Derive a human-readable network type label from the numeric network ID.
|
||||
@param networkId The network identifier from [network_id] config.
|
||||
@return "mainnet", "testnet", "devnet", or "unknown" for other values.
|
||||
*/
|
||||
std::string
|
||||
networkTypeFromId(std::uint32_t networkId)
|
||||
{
|
||||
switch (networkId)
|
||||
{
|
||||
case 0:
|
||||
return "mainnet";
|
||||
case 1:
|
||||
return "testnet";
|
||||
case 2:
|
||||
return "devnet";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Telemetry::Setup
|
||||
setup_Telemetry(
|
||||
Section const& section,
|
||||
std::string const& nodePublicKey,
|
||||
std::string const& version)
|
||||
std::string const& version,
|
||||
std::uint32_t networkId)
|
||||
{
|
||||
Telemetry::Setup setup;
|
||||
|
||||
setup.enabled = section.value_or<int>("enabled", 0) != 0;
|
||||
setup.serviceName = section.value_or<std::string>("service_name", "rippled");
|
||||
setup.serviceName = section.value_or<std::string>("service_name", "xrpld");
|
||||
setup.serviceVersion = version;
|
||||
setup.serviceInstanceId = section.value_or<std::string>("service_instance_id", nodePublicKey);
|
||||
|
||||
setup.exporterType = section.value_or<std::string>("exporter", "otlp_http");
|
||||
setup.exporterEndpoint =
|
||||
section.value_or<std::string>("endpoint", "http://localhost:4318/v1/traces");
|
||||
|
||||
@@ -33,12 +59,16 @@ setup_Telemetry(
|
||||
setup.tlsCertPath = section.value_or<std::string>("tls_ca_cert", "");
|
||||
|
||||
setup.samplingRatio = section.value_or<double>("sampling_ratio", 1.0);
|
||||
setup.samplingRatio = std::clamp(setup.samplingRatio, 0.0, 1.0);
|
||||
|
||||
setup.batchSize = section.value_or<std::uint32_t>("batch_size", 512u);
|
||||
setup.batchDelay =
|
||||
std::chrono::milliseconds{section.value_or<std::uint32_t>("batch_delay_ms", 5000u)};
|
||||
setup.maxQueueSize = section.value_or<std::uint32_t>("max_queue_size", 2048u);
|
||||
|
||||
setup.networkId = networkId;
|
||||
setup.networkType = networkTypeFromId(networkId);
|
||||
|
||||
setup.traceTransactions = section.value_or<int>("trace_transactions", 1) != 0;
|
||||
setup.traceConsensus = section.value_or<int>("trace_consensus", 1) != 0;
|
||||
setup.traceRpc = section.value_or<int>("trace_rpc", 1) != 0;
|
||||
|
||||
@@ -31,6 +31,7 @@
|
||||
#include <xrpld/shamap/NodeFamily.h>
|
||||
|
||||
#include <xrpl/basics/ByteUtilities.h>
|
||||
#include <xrpl/basics/MallocTrim.h>
|
||||
#include <xrpl/basics/ResolverAsio.h>
|
||||
#include <xrpl/basics/random.h>
|
||||
#include <xrpl/beast/asio/io_latency_probe.h>
|
||||
@@ -265,7 +266,8 @@ public:
|
||||
telemetry::setup_Telemetry(
|
||||
config_->section("telemetry"),
|
||||
"", // Updated later via setServiceInstanceId()
|
||||
BuildInfo::getVersionString()),
|
||||
BuildInfo::getVersionString(),
|
||||
config_->NETWORK_ID),
|
||||
logs_->journal("Telemetry")))
|
||||
|
||||
, m_txMaster(*this)
|
||||
@@ -1075,6 +1077,8 @@ public:
|
||||
<< "; size after: " << cachedSLEs_.size();
|
||||
}
|
||||
|
||||
mallocTrim("doSweep", m_journal);
|
||||
|
||||
// Set timer to do another sweep later.
|
||||
setSweepTimer();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user