mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-10 12:16:49 +00:00
removed head sampling ratio from config
Signed-off-by: Pratik Mankawde <3397372+pratikmankawde@users.noreply.github.com>
This commit is contained in:
@@ -1648,14 +1648,13 @@ validators.txt
|
||||
# protobuf-encoded HTTP POST requests to this URL.
|
||||
# Default: http://localhost:4318/v1/traces.
|
||||
#
|
||||
# sampling_ratio=1.0
|
||||
#
|
||||
# Head-based sampling ratio using TraceIdRatioBasedSampler. The decision
|
||||
# to record or drop a trace is made at span creation time, before the
|
||||
# span starts, based on the trace ID. Values in [0.0, 1.0].
|
||||
# 1.0 = trace everything, 0.1 = sample ~10% of traces. Default: 1.0.
|
||||
# For tail-based (post-hoc) filtering — where you decide to drop a span
|
||||
# after inspecting its content — use SpanGuard::discard() in code.
|
||||
# Head sampling is intentionally fixed at 1.0 (sample everything) and is
|
||||
# not configurable. A per-node sampling ratio would let nodes make
|
||||
# divergent keep/drop decisions for the same distributed trace, producing
|
||||
# broken/partial traces. A ParentBasedSampler ensures spans inheriting a
|
||||
# remote parent honor the upstream decision. Reduce volume at the collector
|
||||
# via tail sampling instead; for node-local post-hoc dropping use
|
||||
# SpanGuard::discard() in code.
|
||||
#
|
||||
# trace_rpc=1
|
||||
#
|
||||
|
||||
@@ -42,23 +42,27 @@
|
||||
|
||||
Usage examples:
|
||||
|
||||
Span names and attribute keys come from per-module `*SpanNames.h`
|
||||
headers (e.g. RpcSpanNames.h, TxSpanNames.h) as typed compile-time
|
||||
constants — never raw string literals — so the naming spec is
|
||||
enforced at the call site and dashboards stay in sync.
|
||||
|
||||
1. Basic RPC tracing (factory method with category):
|
||||
@code
|
||||
// Define prefix at class level:
|
||||
static constexpr std::string_view spanPrefix_ = "rpc.command";
|
||||
#include <xrpld/rpc/detail/RpcSpanNames.h>
|
||||
using namespace xrpl::telemetry;
|
||||
|
||||
// At the call site:
|
||||
auto span = SpanGuard::span(
|
||||
TraceCategory::Rpc, spanPrefix_, "submit");
|
||||
span.setAttribute("xrpl.rpc.command", "submit");
|
||||
span.setAttribute("xrpl.rpc.status", "success");
|
||||
TraceCategory::Rpc, rpc_span::prefix::command, "submit");
|
||||
span.setAttribute(rpc_span::attr::command, "submit");
|
||||
span.setAttribute(rpc_span::attr::rpcStatus, rpc_span::val::success);
|
||||
// span ended automatically on scope exit
|
||||
@endcode
|
||||
|
||||
2. Error recording:
|
||||
@code
|
||||
auto span = SpanGuard::span(
|
||||
TraceCategory::Rpc, "rpc.command", "submit");
|
||||
TraceCategory::Rpc, rpc_span::prefix::command, "submit");
|
||||
try {
|
||||
doWork();
|
||||
span.setOk();
|
||||
@@ -69,29 +73,32 @@
|
||||
|
||||
3. Cross-thread context propagation:
|
||||
@code
|
||||
#include <xrpld/consensus/ConsensusSpanNames.h>
|
||||
using namespace xrpl::telemetry;
|
||||
|
||||
// Thread A: create span and capture context
|
||||
auto span = SpanGuard::span(
|
||||
TraceCategory::Consensus, "consensus", "round");
|
||||
TraceCategory::Consensus, seg::consensus, consensus::span::op::round);
|
||||
auto ctx = span.captureContext();
|
||||
|
||||
// Thread B: create child with captured context
|
||||
auto child = SpanGuard::childSpan("consensus.accept", ctx);
|
||||
auto child = SpanGuard::childSpan(consensus::span::accept, ctx);
|
||||
@endcode
|
||||
|
||||
4. Conditional check (rarely needed — methods are no-ops on null):
|
||||
@code
|
||||
auto span = SpanGuard::span(
|
||||
TraceCategory::Rpc, "rpc", "request");
|
||||
TraceCategory::Rpc, rpc_span::prefix::rpc, rpc_span::op::httpRequest);
|
||||
if (span) {
|
||||
// expensive attribute computation only when active
|
||||
span.setAttribute("xrpl.rpc.payload_size", computeSize());
|
||||
span.setAttribute(rpc_span::attr::requestPayloadSize, computeSize());
|
||||
}
|
||||
@endcode
|
||||
|
||||
5. Tail-based filtering via discard():
|
||||
@code
|
||||
auto span = SpanGuard::span(
|
||||
TraceCategory::Transactions, "tx", "process");
|
||||
TraceCategory::Transactions, tx_span::prefix::tx, tx_span::op::process);
|
||||
auto result = preflight(tx);
|
||||
if (result != tesSUCCESS) {
|
||||
span.discard(); // drop span, never exported
|
||||
|
||||
@@ -150,13 +150,16 @@ public:
|
||||
/** Path to a CA certificate bundle for TLS verification. */
|
||||
std::string tlsCertPath;
|
||||
|
||||
/** Head-based sampling ratio in [0.0, 1.0]. 1.0 = trace everything.
|
||||
This is a head-based (pre-decision) sampler using
|
||||
TraceIdRatioBasedSampler — the decision to record or drop a
|
||||
trace is made before the root span starts. For post-hoc
|
||||
(tail-based) filtering, see SpanGuard::discard().
|
||||
/** Head-based sampling ratio. Intentionally fixed at 1.0 (sample
|
||||
everything) and NOT read from config. A per-node ratio would let
|
||||
nodes make divergent keep/drop decisions for the same distributed
|
||||
trace, producing broken/partial traces. The ratio sampler is wrapped
|
||||
in a ParentBasedSampler (see Telemetry.cpp) so spans inheriting a
|
||||
remote parent honor the upstream sampled flag. Volume reduction is
|
||||
delegated to the collector's tail sampling; for node-local post-hoc
|
||||
dropping see SpanGuard::discard().
|
||||
*/
|
||||
double samplingRatio = 1.0;
|
||||
double const samplingRatio = 1.0;
|
||||
|
||||
/** Maximum number of spans per batch export. */
|
||||
std::uint32_t batchSize = 512;
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include <opentelemetry/sdk/trace/batch_span_processor_options.h>
|
||||
#include <opentelemetry/sdk/trace/processor.h>
|
||||
#include <opentelemetry/sdk/trace/sampler.h>
|
||||
#include <opentelemetry/sdk/trace/samplers/parent_factory.h>
|
||||
#include <opentelemetry/sdk/trace/samplers/trace_id_ratio.h>
|
||||
#include <opentelemetry/sdk/trace/tracer_provider.h>
|
||||
#include <opentelemetry/sdk/trace/tracer_provider_factory.h>
|
||||
@@ -299,8 +300,15 @@ public:
|
||||
{"xrpl.network.type", setup_.networkType},
|
||||
});
|
||||
|
||||
// Configure sampler
|
||||
auto sampler = std::make_unique<trace_sdk::TraceIdRatioBasedSampler>(setup_.samplingRatio);
|
||||
// Configure sampler. Head sampling is fixed at 1.0 (sample everything);
|
||||
// setup_.samplingRatio is not config-driven. Wrap the ratio sampler in a
|
||||
// ParentBasedSampler so spans with a remote parent honor the upstream
|
||||
// sampled flag — this keeps keep/drop decisions coherent for a single
|
||||
// distributed trace spanning multiple nodes. Volume reduction is left to
|
||||
// the collector's tail sampling.
|
||||
auto rootSampler =
|
||||
std::make_shared<trace_sdk::TraceIdRatioBasedSampler>(setup_.samplingRatio);
|
||||
auto sampler = trace_sdk::ParentBasedSamplerFactory::Create(std::move(rootSampler));
|
||||
|
||||
// Create TracerProvider
|
||||
sdkProvider_ = trace_sdk::TracerProviderFactory::Create(
|
||||
|
||||
@@ -8,9 +8,10 @@
|
||||
*/
|
||||
|
||||
#include <xrpl/basics/BasicConfig.h>
|
||||
#include <xrpl/protocol/SystemParameters.h>
|
||||
#include <xrpl/telemetry/Telemetry.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
@@ -18,6 +19,38 @@ namespace xrpl::telemetry {
|
||||
|
||||
namespace {
|
||||
|
||||
/** Config key names for the [telemetry] section.
|
||||
|
||||
Each must match the corresponding option documented in
|
||||
cfg/xrpld-example.cfg verbatim. Defined as `char const*` so they
|
||||
pass to Section::valueOr() (which takes `std::string const&`)
|
||||
without an explicit conversion, exactly as a literal would.
|
||||
*/
|
||||
namespace key {
|
||||
constexpr char const* enabled = "enabled";
|
||||
constexpr char const* serviceName = "service_name";
|
||||
constexpr char const* serviceInstanceId = "service_instance_id";
|
||||
constexpr char const* endpoint = "endpoint";
|
||||
constexpr char const* useTls = "use_tls";
|
||||
constexpr char const* tlsCaCert = "tls_ca_cert";
|
||||
constexpr char const* batchSize = "batch_size";
|
||||
constexpr char const* batchDelayMs = "batch_delay_ms";
|
||||
constexpr char const* maxQueueSize = "max_queue_size";
|
||||
constexpr char const* traceTransactions = "trace_transactions";
|
||||
constexpr char const* traceConsensus = "trace_consensus";
|
||||
constexpr char const* traceRpc = "trace_rpc";
|
||||
constexpr char const* tracePeer = "trace_peer";
|
||||
constexpr char const* traceLedger = "trace_ledger";
|
||||
} // namespace key
|
||||
|
||||
/** Default values applied when a key is absent from the config. */
|
||||
namespace dflt {
|
||||
constexpr char const* endpoint = "http://localhost:4318/v1/traces";
|
||||
constexpr std::uint32_t batchSize = 512u;
|
||||
constexpr std::uint32_t batchDelayMs = 5000u;
|
||||
constexpr std::uint32_t maxQueueSize = 2048u;
|
||||
} // namespace dflt
|
||||
|
||||
/** Derive a human-readable network type label from the numeric network ID.
|
||||
@param networkId The network identifier from [network_id] config.
|
||||
@return "mainnet", "testnet", "devnet", or "unknown" for other values.
|
||||
@@ -49,33 +82,35 @@ setupTelemetry(
|
||||
{
|
||||
Telemetry::Setup setup;
|
||||
|
||||
setup.enabled = section.valueOr<int>("enabled", 0) != 0;
|
||||
setup.serviceName = section.valueOr<std::string>("service_name", "xrpld");
|
||||
setup.enabled = section.valueOr<int>(key::enabled, 0) != 0;
|
||||
setup.serviceName = section.valueOr<std::string>(key::serviceName, systemName());
|
||||
setup.serviceVersion = version;
|
||||
setup.serviceInstanceId = section.valueOr<std::string>("service_instance_id", nodePublicKey);
|
||||
setup.serviceInstanceId = section.valueOr<std::string>(key::serviceInstanceId, nodePublicKey);
|
||||
|
||||
setup.exporterEndpoint =
|
||||
section.valueOr<std::string>("endpoint", "http://localhost:4318/v1/traces");
|
||||
setup.exporterEndpoint = section.valueOr<std::string>(key::endpoint, dflt::endpoint);
|
||||
|
||||
setup.useTls = section.valueOr<int>("use_tls", 0) != 0;
|
||||
setup.tlsCertPath = section.valueOr<std::string>("tls_ca_cert", "");
|
||||
setup.useTls = section.valueOr<int>(key::useTls, 0) != 0;
|
||||
setup.tlsCertPath = section.valueOr<std::string>(key::tlsCaCert, "");
|
||||
|
||||
setup.samplingRatio = section.valueOr<double>("sampling_ratio", 1.0);
|
||||
setup.samplingRatio = std::clamp(setup.samplingRatio, 0.0, 1.0);
|
||||
// Head sampling is intentionally fixed at 1.0 (sample everything) and is
|
||||
// not read from config. A per-node ratio would let nodes make divergent
|
||||
// keep/drop decisions for the same distributed trace, producing broken
|
||||
// traces; volume reduction is delegated to the collector's tail sampling.
|
||||
// setup.samplingRatio is a const member fixed at 1.0; nothing to parse.
|
||||
|
||||
setup.batchSize = section.valueOr<std::uint32_t>("batch_size", 512u);
|
||||
setup.batchDelay =
|
||||
std::chrono::milliseconds{section.valueOr<std::uint32_t>("batch_delay_ms", 5000u)};
|
||||
setup.maxQueueSize = section.valueOr<std::uint32_t>("max_queue_size", 2048u);
|
||||
setup.batchSize = section.valueOr<std::uint32_t>(key::batchSize, dflt::batchSize);
|
||||
setup.batchDelay = std::chrono::milliseconds{
|
||||
section.valueOr<std::uint32_t>(key::batchDelayMs, dflt::batchDelayMs)};
|
||||
setup.maxQueueSize = section.valueOr<std::uint32_t>(key::maxQueueSize, dflt::maxQueueSize);
|
||||
|
||||
setup.networkId = networkId;
|
||||
setup.networkType = networkTypeFromId(networkId);
|
||||
|
||||
setup.traceTransactions = section.valueOr<int>("trace_transactions", 1) != 0;
|
||||
setup.traceConsensus = section.valueOr<int>("trace_consensus", 1) != 0;
|
||||
setup.traceRpc = section.valueOr<int>("trace_rpc", 1) != 0;
|
||||
setup.tracePeer = section.valueOr<int>("trace_peer", 1) != 0;
|
||||
setup.traceLedger = section.valueOr<int>("trace_ledger", 1) != 0;
|
||||
setup.traceTransactions = section.valueOr<int>(key::traceTransactions, 1) != 0;
|
||||
setup.traceConsensus = section.valueOr<int>(key::traceConsensus, 1) != 0;
|
||||
setup.traceRpc = section.valueOr<int>(key::traceRpc, 1) != 0;
|
||||
setup.tracePeer = section.valueOr<int>(key::tracePeer, 1) != 0;
|
||||
setup.traceLedger = section.valueOr<int>(key::traceLedger, 1) != 0;
|
||||
|
||||
return setup;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user