mirror of
https://github.com/XRPLF/rippled.git
synced 2026-06-04 17:27:00 +00:00
feat(telemetry): add ledger.acquire span for inbound ledger fetch
InboundLedger drives ledger back-fill and fork recovery with timeout/retry logic (kLedgerTimeoutRetriesMax = 6), but emitted only a global ledger_fetches counter — sync/recovery cost was a telemetry blind spot. Add a ledger.acquire span that wraps the acquisition lifecycle: - Started in InboundLedger::init() with ledger_seq and acquire_reason (history / consensus / generic, mirroring InboundLedger::Reason). - Finalized in InboundLedger::done() with outcome (complete / failed), timeouts, and peer_count, then reset so the span duration is exported. Held as a std::optional<SpanGuard> member (same pattern as RCLConsensus roundSpan_). New op/attr/val constants added to LedgerSpanNames.h. Compiles to a no-op when telemetry is disabled via the SpanGuard fallback. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -6,8 +6,10 @@
|
||||
|
||||
#include <xrpl/basics/CountedObject.h>
|
||||
#include <xrpl/ledger/Ledger.h>
|
||||
#include <xrpl/telemetry/SpanGuard.h>
|
||||
|
||||
#include <mutex>
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
|
||||
@@ -170,6 +172,12 @@ private:
|
||||
receivedData_;
|
||||
bool receiveDispatched_{false};
|
||||
std::unique_ptr<PeerSet> peerSet_;
|
||||
|
||||
/// Spans the acquire lifecycle: started in init(), finalized in done()
|
||||
/// with the outcome (complete/failed), timeout count, and peer count.
|
||||
/// Gives operators visibility into back-fill / fork-recovery cost, which
|
||||
/// previously emitted no span or metric.
|
||||
std::optional<telemetry::SpanGuard> acquireSpan_;
|
||||
};
|
||||
|
||||
} // namespace xrpl
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include <xrpld/app/ledger/InboundLedgers.h>
|
||||
#include <xrpld/app/ledger/LedgerMaster.h>
|
||||
#include <xrpld/app/ledger/TransactionStateSF.h>
|
||||
#include <xrpld/app/ledger/detail/LedgerSpanNames.h>
|
||||
#include <xrpld/app/ledger/detail/TimeoutCounter.h>
|
||||
#include <xrpld/app/main/Application.h>
|
||||
#include <xrpld/overlay/Message.h>
|
||||
@@ -30,6 +31,8 @@
|
||||
#include <xrpl/resource/Fees.h>
|
||||
#include <xrpl/shamap/SHAMapNodeID.h>
|
||||
#include <xrpl/shamap/SHAMapSyncFilter.h>
|
||||
#include <xrpl/telemetry/SpanGuard.h>
|
||||
#include <xrpl/telemetry/SpanNames.h>
|
||||
|
||||
#include <boost/iterator/function_output_iterator.hpp>
|
||||
|
||||
@@ -46,6 +49,7 @@
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
@@ -95,6 +99,23 @@ InboundLedger::init(ScopedLockType& collectionLock)
|
||||
ScopedLockType sl(mtx_);
|
||||
collectionLock.unlock();
|
||||
|
||||
// Span the acquire lifecycle so back-fill / fork-recovery cost is
|
||||
// observable. Finalized in done() with the outcome and timeout count.
|
||||
{
|
||||
using namespace telemetry;
|
||||
acquireSpan_.emplace(
|
||||
SpanGuard::span(TraceCategory::Ledger, seg::ledger, ledger_span::op::acquire));
|
||||
if (*acquireSpan_)
|
||||
{
|
||||
acquireSpan_->setAttribute(ledger_span::attr::ledgerSeq, static_cast<int64_t>(seq_));
|
||||
std::string_view const reasonVal = reason_ == Reason::HISTORY
|
||||
? std::string_view(ledger_span::val::history)
|
||||
: reason_ == Reason::CONSENSUS ? std::string_view(ledger_span::val::consensus)
|
||||
: std::string_view(ledger_span::val::generic);
|
||||
acquireSpan_->setAttribute(ledger_span::attr::acquireReason, reasonVal);
|
||||
}
|
||||
}
|
||||
|
||||
tryDB(app_.getNodeFamily().db());
|
||||
if (failed_)
|
||||
return;
|
||||
@@ -416,6 +437,21 @@ InboundLedger::done()
|
||||
signaled_ = true;
|
||||
touch();
|
||||
|
||||
// Finalize the acquire span with the outcome, timeout count, and peer
|
||||
// count, then end it (reset) so its duration is exported.
|
||||
if (acquireSpan_ && *acquireSpan_)
|
||||
{
|
||||
using namespace telemetry;
|
||||
acquireSpan_->setAttribute(
|
||||
ledger_span::attr::outcome,
|
||||
failed_ ? std::string_view(ledger_span::val::failed)
|
||||
: std::string_view(ledger_span::val::complete));
|
||||
acquireSpan_->setAttribute(ledger_span::attr::timeouts, static_cast<int64_t>(timeouts_));
|
||||
acquireSpan_->setAttribute(
|
||||
ledger_span::attr::peerCount, static_cast<int64_t>(getPeerCount()));
|
||||
}
|
||||
acquireSpan_.reset();
|
||||
|
||||
JLOG(journal_.debug()) << "Acquire " << hash_ << (failed_ ? " fail " : " ")
|
||||
<< ((timeouts_ == 0)
|
||||
? std::string()
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
* ledger.build (BuildLedger — ledger construction)
|
||||
* ledger.store (LedgerMaster — ledger storage)
|
||||
* ledger.validate (LedgerMaster — ledger validation acceptance)
|
||||
* ledger.acquire (InboundLedger — fetch a missing ledger from peers)
|
||||
* tx.apply (BuildLedger — transaction application)
|
||||
*/
|
||||
|
||||
@@ -24,6 +25,7 @@ inline constexpr auto build = makeStr("build");
|
||||
inline constexpr auto store = makeStr("store");
|
||||
inline constexpr auto validate = makeStr("validate");
|
||||
inline constexpr auto apply = makeStr("apply");
|
||||
inline constexpr auto acquire = makeStr("acquire");
|
||||
} // namespace op
|
||||
|
||||
// ===== Attribute keys ========================================================
|
||||
@@ -40,6 +42,24 @@ using ::xrpl::telemetry::attr::ledgerSeq;
|
||||
inline constexpr auto txCount = makeStr("tx_count");
|
||||
inline constexpr auto txFailed = makeStr("tx_failed");
|
||||
inline constexpr auto validations = makeStr("validations");
|
||||
|
||||
/// ledger.acquire attrs (InboundLedger fetch lifecycle).
|
||||
inline constexpr auto acquireReason = makeStr("acquire_reason");
|
||||
inline constexpr auto timeouts = makeStr("timeouts");
|
||||
inline constexpr auto peerCount = makeStr("peer_count");
|
||||
inline constexpr auto outcome = makeStr("outcome");
|
||||
} // namespace attr
|
||||
|
||||
// ===== Attribute values ======================================================
|
||||
|
||||
namespace val {
|
||||
/// ledger.acquire outcome values.
|
||||
inline constexpr auto complete = makeStr("complete");
|
||||
inline constexpr auto failed = makeStr("failed");
|
||||
/// ledger.acquire reason values (mirror InboundLedger::Reason).
|
||||
inline constexpr auto history = makeStr("history");
|
||||
inline constexpr auto consensus = makeStr("consensus");
|
||||
inline constexpr auto generic = makeStr("generic");
|
||||
} // namespace val
|
||||
|
||||
} // namespace xrpl::telemetry::ledger_span
|
||||
|
||||
Reference in New Issue
Block a user