mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-04 01:06:48 +00:00
feat(telemetry): add Phase 6 StatsD metrics, ledger/peer spans, and expanded dashboards
Integrate the existing StatsD metrics pipeline (beast::insight) into the OpenTelemetry observability stack and add new trace spans for ledger build/store/validate and peer proposal/validation receive. Phase 5b — Ledger, peer, and transaction spans: - Add ledger.build span with close time attributes in BuildLedger.cpp - Add tx.apply span with tx_count/tx_failed in BuildLedger.cpp - Add ledger.store and ledger.validate spans in LedgerMaster.cpp - Add peer.proposal.receive span with trusted attribute in PeerImp.cpp - Add peer.validation.receive span with ledger_hash, full, trusted attributes in PeerImp.cpp - Add ledger-operations and peer-network Grafana dashboards Phase 6 — StatsD metrics integration: - Add StatsD UDP receiver (port 8125) to OTel Collector - Add 5 StatsD Grafana dashboards: node health, network traffic, overlay traffic detail, ledger data sync, RPC pathfinding - Add 09-data-collection-reference.md cataloging all metrics/spans - Update existing dashboards with new span panels - Expand telemetry runbook and integration test script - Add codecov exclusions for telemetry modules Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
|
||||
#include <xrpld/app/ledger/LedgerReplay.h>
|
||||
#include <xrpld/app/ledger/OpenLedger.h>
|
||||
#include <xrpld/app/ledger/detail/LedgerSpanNames.h>
|
||||
#include <xrpld/app/main/Application.h>
|
||||
|
||||
#include <xrpl/basics/Log.h>
|
||||
@@ -13,8 +14,10 @@
|
||||
#include <xrpl/ledger/Ledger.h>
|
||||
#include <xrpl/ledger/OpenView.h>
|
||||
#include <xrpl/nodestore/NodeObject.h>
|
||||
#include <xrpl/protocol/Feature.h>
|
||||
#include <xrpl/protocol/LedgerHeader.h>
|
||||
#include <xrpl/protocol/Protocol.h>
|
||||
#include <xrpl/telemetry/SpanGuard.h>
|
||||
#include <xrpl/tx/apply.h>
|
||||
|
||||
#include <cstddef>
|
||||
@@ -41,6 +44,9 @@ buildLedgerImpl(
|
||||
beast::Journal j,
|
||||
ApplyTxs&& applyTxs)
|
||||
{
|
||||
using namespace telemetry;
|
||||
auto buildSpan = SpanGuard::span(TraceCategory::Ledger, seg::ledger, ledger_span::op::build);
|
||||
|
||||
auto built = std::make_shared<Ledger>(*parent, closeTime);
|
||||
|
||||
if (built->isFlagLedger())
|
||||
@@ -74,6 +80,14 @@ buildLedgerImpl(
|
||||
built->header().seq < XRP_LEDGER_EARLIEST_FEES || built->read(keylet::fees()),
|
||||
"xrpl::buildLedgerImpl : valid ledger fees");
|
||||
built->setAccepted(closeTime, closeResolution, closeTimeCorrect);
|
||||
buildSpan.setAttribute(ledger_span::attr::seq, static_cast<int64_t>(built->header().seq));
|
||||
buildSpan.setAttribute(
|
||||
ledger_span::attr::closeTime, static_cast<int64_t>(closeTime.time_since_epoch().count()));
|
||||
buildSpan.setAttribute(ledger_span::attr::closeTimeCorrect, closeTimeCorrect);
|
||||
buildSpan.setAttribute(
|
||||
ledger_span::attr::closeResolutionMs,
|
||||
static_cast<int64_t>(
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(closeResolution).count()));
|
||||
|
||||
return built;
|
||||
}
|
||||
@@ -97,6 +111,9 @@ applyTransactions(
|
||||
OpenView& view,
|
||||
beast::Journal j)
|
||||
{
|
||||
using namespace telemetry;
|
||||
auto applySpan = SpanGuard::span(TraceCategory::Transactions, seg::tx, ledger_span::op::apply);
|
||||
|
||||
bool certainRetry = true;
|
||||
std::size_t count = 0;
|
||||
|
||||
@@ -163,6 +180,8 @@ applyTransactions(
|
||||
// If there are any transactions left, we must have
|
||||
// tried them in at least one final pass
|
||||
XRPL_ASSERT(txns.empty() || !certainRetry, "xrpl::applyTransactions : retry transactions");
|
||||
applySpan.setAttribute(ledger_span::attr::txCount, static_cast<int64_t>(count));
|
||||
applySpan.setAttribute(ledger_span::attr::txFailed, static_cast<int64_t>(failed.size()));
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <xrpld/app/ledger/LedgerReplay.h>
|
||||
#include <xrpld/app/ledger/LedgerReplayer.h>
|
||||
#include <xrpld/app/ledger/OpenLedger.h>
|
||||
#include <xrpld/app/ledger/detail/LedgerSpanNames.h>
|
||||
#include <xrpld/app/main/Application.h>
|
||||
#include <xrpld/app/misc/SHAMapStore.h>
|
||||
#include <xrpld/app/misc/Transaction.h>
|
||||
@@ -55,6 +56,7 @@
|
||||
#include <xrpl/shamap/SHAMap.h>
|
||||
#include <xrpl/shamap/SHAMapMissingNode.h>
|
||||
#include <xrpl/shamap/SHAMapTreeNode.h>
|
||||
#include <xrpl/telemetry/SpanGuard.h>
|
||||
|
||||
#include <boost/icl/concept/interval_set.hpp>
|
||||
|
||||
@@ -449,6 +451,10 @@ LedgerMaster::fixIndex(LedgerIndex ledgerIndex, LedgerHash const& ledgerHash)
|
||||
bool
|
||||
LedgerMaster::storeLedger(std::shared_ptr<Ledger const> ledger)
|
||||
{
|
||||
using namespace telemetry;
|
||||
auto span = SpanGuard::span(TraceCategory::Ledger, seg::ledger, ledger_span::op::store);
|
||||
span.setAttribute(ledger_span::attr::seq, static_cast<int64_t>(ledger->header().seq));
|
||||
|
||||
bool const validated = ledger->header().validated;
|
||||
// Returns true if we already had the ledger
|
||||
return mLedgerHistory.insert(ledger, validated);
|
||||
@@ -965,6 +971,11 @@ LedgerMaster::checkAccept(std::shared_ptr<Ledger const> const& ledger)
|
||||
return;
|
||||
}
|
||||
|
||||
using namespace telemetry;
|
||||
auto valSpan = SpanGuard::span(TraceCategory::Ledger, seg::ledger, ledger_span::op::validate);
|
||||
valSpan.setAttribute(ledger_span::attr::seq, static_cast<int64_t>(ledger->header().seq));
|
||||
valSpan.setAttribute(ledger_span::attr::validations, static_cast<int64_t>(tvc));
|
||||
|
||||
JLOG(m_journal.info()) << "Advancing accepted ledger to " << ledger->header().seq
|
||||
<< " with >= " << minVal << " validations";
|
||||
|
||||
|
||||
54
src/xrpld/app/ledger/detail/LedgerSpanNames.h
Normal file
54
src/xrpld/app/ledger/detail/LedgerSpanNames.h
Normal file
@@ -0,0 +1,54 @@
|
||||
#pragma once
|
||||
|
||||
/** Compile-time span name constants for ledger tracing.
|
||||
*
|
||||
* Used by BuildLedger and LedgerMaster for ledger lifecycle spans.
|
||||
* Built on StaticStr/join() from SpanNames.h.
|
||||
*
|
||||
* Span hierarchy:
|
||||
*
|
||||
* ledger.build (BuildLedger — ledger construction)
|
||||
* ledger.store (LedgerMaster — ledger storage)
|
||||
* ledger.validate (LedgerMaster — ledger validation acceptance)
|
||||
* tx.apply (BuildLedger — transaction application)
|
||||
*/
|
||||
|
||||
#include <xrpl/telemetry/SpanNames.h>
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
namespace ledger_span {
|
||||
|
||||
// ===== Span operation suffixes ===============================================
|
||||
|
||||
namespace op {
|
||||
inline constexpr auto build = makeStr("build");
|
||||
inline constexpr auto store = makeStr("store");
|
||||
inline constexpr auto validate = makeStr("validate");
|
||||
inline constexpr auto apply = makeStr("apply");
|
||||
} // namespace op
|
||||
|
||||
// ===== Attribute keys ========================================================
|
||||
|
||||
namespace attr {
|
||||
inline constexpr auto xrplLedger = join(seg::xrpl, seg::ledger);
|
||||
|
||||
/// "xrpl.ledger.seq"
|
||||
inline constexpr auto seq = join(xrplLedger, makeStr("seq"));
|
||||
/// "xrpl.ledger.close_time"
|
||||
inline constexpr auto closeTime = join(xrplLedger, makeStr("close_time"));
|
||||
/// "xrpl.ledger.close_time_correct"
|
||||
inline constexpr auto closeTimeCorrect = join(xrplLedger, makeStr("close_time_correct"));
|
||||
/// "xrpl.ledger.close_resolution_ms"
|
||||
inline constexpr auto closeResolutionMs = join(xrplLedger, makeStr("close_resolution_ms"));
|
||||
/// "xrpl.ledger.tx_count"
|
||||
inline constexpr auto txCount = join(xrplLedger, makeStr("tx_count"));
|
||||
/// "xrpl.ledger.tx_failed"
|
||||
inline constexpr auto txFailed = join(xrplLedger, makeStr("tx_failed"));
|
||||
/// "xrpl.ledger.validations"
|
||||
inline constexpr auto validations = join(xrplLedger, makeStr("validations"));
|
||||
} // namespace attr
|
||||
|
||||
} // namespace ledger_span
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
@@ -16,6 +16,7 @@
|
||||
#include <xrpld/overlay/ReduceRelayCommon.h>
|
||||
#include <xrpld/overlay/detail/Handshake.h>
|
||||
#include <xrpld/overlay/detail/OverlayImpl.h>
|
||||
#include <xrpld/overlay/detail/PeerSpanNames.h>
|
||||
#include <xrpld/overlay/detail/ProtocolMessage.h>
|
||||
#include <xrpld/overlay/detail/ProtocolVersion.h>
|
||||
#include <xrpld/overlay/detail/TrafficCount.h>
|
||||
@@ -1863,6 +1864,10 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMLedgerData> const& m)
|
||||
void
|
||||
PeerImp::onMessage(std::shared_ptr<protocol::TMProposeSet> const& m)
|
||||
{
|
||||
using namespace telemetry;
|
||||
auto span = SpanGuard::span(TraceCategory::Peer, seg::peer, peer_span::op::proposalReceive);
|
||||
span.setAttribute(peer_span::attr::id, static_cast<int64_t>(id_));
|
||||
|
||||
protocol::TMProposeSet const& set = *m;
|
||||
|
||||
auto const sig = makeSlice(set.signature());
|
||||
@@ -1889,6 +1894,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMProposeSet> const& m)
|
||||
// every time a spam packet is received
|
||||
PublicKey const publicKey{makeSlice(set.nodepubkey())};
|
||||
auto const isTrusted = app_.getValidators().trusted(publicKey);
|
||||
span.setAttribute(peer_span::attr::proposalTrusted, isTrusted);
|
||||
|
||||
// If the operator has specified that untrusted proposals be dropped then
|
||||
// this happens here I.e. before further wasting CPU verifying the signature
|
||||
@@ -2459,6 +2465,11 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMValidatorListCollection> const& m
|
||||
void
|
||||
PeerImp::onMessage(std::shared_ptr<protocol::TMValidation> const& m)
|
||||
{
|
||||
using namespace telemetry;
|
||||
auto valSpan =
|
||||
SpanGuard::span(TraceCategory::Peer, seg::peer, peer_span::op::validationReceive);
|
||||
valSpan.setAttribute(peer_span::attr::id, static_cast<int64_t>(id_));
|
||||
|
||||
if (m->validation().size() < 50)
|
||||
{
|
||||
JLOG(p_journal_.warn()) << "Validation: Too small";
|
||||
@@ -2481,6 +2492,9 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMValidation> const& m)
|
||||
false);
|
||||
val->setSeen(closeTime);
|
||||
}
|
||||
valSpan.setAttribute(
|
||||
peer_span::attr::validationLedgerHash, to_string(val->getLedgerHash()).c_str());
|
||||
valSpan.setAttribute(peer_span::attr::validationFull, val->isFull());
|
||||
|
||||
if (!isCurrent(
|
||||
app_.getValidations().parms(),
|
||||
@@ -2497,6 +2511,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMValidation> const& m)
|
||||
// suppression for 30 seconds to avoid doing a relatively expensive
|
||||
// lookup every time a spam packet is received
|
||||
auto const isTrusted = app_.getValidators().trusted(val->getSignerPublic());
|
||||
valSpan.setAttribute(peer_span::attr::validationTrusted, isTrusted);
|
||||
|
||||
// If the operator has specified that untrusted validations be
|
||||
// dropped then this happens here I.e. before further wasting CPU
|
||||
|
||||
50
src/xrpld/overlay/detail/PeerSpanNames.h
Normal file
50
src/xrpld/overlay/detail/PeerSpanNames.h
Normal file
@@ -0,0 +1,50 @@
|
||||
#pragma once
|
||||
|
||||
/** Compile-time span name constants for peer overlay tracing.
|
||||
*
|
||||
* Used by PeerImp for peer message handling spans (proposals,
|
||||
* validations). Built on StaticStr/join() from SpanNames.h.
|
||||
*
|
||||
* Span hierarchy:
|
||||
*
|
||||
* peer.proposal.receive (PeerImp — incoming proposal)
|
||||
* peer.validation.receive (PeerImp — incoming validation)
|
||||
*/
|
||||
|
||||
#include <xrpl/telemetry/SpanNames.h>
|
||||
|
||||
namespace xrpl {
|
||||
namespace telemetry {
|
||||
namespace peer_span {
|
||||
|
||||
// ===== Span operation suffixes ===============================================
|
||||
|
||||
namespace op {
|
||||
inline constexpr auto proposalReceive = makeStr("proposal.receive");
|
||||
inline constexpr auto validationReceive = makeStr("validation.receive");
|
||||
} // namespace op
|
||||
|
||||
// ===== Attribute keys ========================================================
|
||||
|
||||
namespace attr {
|
||||
inline constexpr auto xrplPeer = join(seg::xrpl, seg::peer);
|
||||
|
||||
/// "xrpl.peer.id"
|
||||
inline constexpr auto id = join(xrplPeer, makeStr("id"));
|
||||
/// "xrpl.peer.proposal.trusted"
|
||||
inline constexpr auto proposalTrusted =
|
||||
join(join(xrplPeer, makeStr("proposal")), makeStr("trusted"));
|
||||
|
||||
/// "xrpl.peer.validation.ledger_hash"
|
||||
inline constexpr auto validationLedgerHash =
|
||||
join(join(xrplPeer, makeStr("validation")), makeStr("ledger_hash"));
|
||||
/// "xrpl.peer.validation.full"
|
||||
inline constexpr auto validationFull = join(join(xrplPeer, makeStr("validation")), makeStr("full"));
|
||||
/// "xrpl.peer.validation.trusted"
|
||||
inline constexpr auto validationTrusted =
|
||||
join(join(xrplPeer, makeStr("validation")), makeStr("trusted"));
|
||||
} // namespace attr
|
||||
|
||||
} // namespace peer_span
|
||||
} // namespace telemetry
|
||||
} // namespace xrpl
|
||||
Reference in New Issue
Block a user