refactor: Enforce 15-char limit and simplify labels for thread naming (#6212)

This change continues the thread naming work from #5691 and #5758, which enables more useful lock contention profiling by ensuring threads/jobs have short, stable, human-readable names (rather than being truncated/failing due to OS limits). This changes diagnostic naming only (thread names and job/load-event labels), not behavior.

Specific modifications are:
* Shortens all thread/job names used with `beast::setCurrentThreadName`, so the effective Linux thread name stays within the 15-character limit.
* Removes per-ledger sequence numbers from job/thread names to avoid long labels. This improves aggregation in lock contention profiling for short-lived job executions.
This commit is contained in:
Valentin Balaschenko
2026-01-22 13:19:29 +00:00
committed by Ed Hennis
parent 415e3d8ca5
commit 9deae7eef8
20 changed files with 107 additions and 133 deletions

View File

@@ -22,6 +22,7 @@
//==============================================================================
#include <xrpl/beast/core/CurrentThreadName.h>
#include <xrpl/beast/utility/instrumentation.h>
#include <string>
#include <string_view>
@@ -118,6 +119,11 @@ setCurrentThreadNameImpl(std::string_view name)
std::cerr << "WARNING: Thread name \"" << name << "\" (length "
<< name.size() << ") exceeds maximum of "
<< maxThreadNameLength << " characters on Linux.\n";
XRPL_ASSERT(
false,
"beast::detail::setCurrentThreadNameImpl : Thread name exceeds "
"maximum length for Linux");
}
#endif
}

View File

@@ -107,20 +107,15 @@ public:
BEAST_EXPECT(stateB == 2);
}
#if BOOST_OS_LINUX
// On Linux, verify that thread names longer than 15 characters
// are truncated to 15 characters (the 16th character is reserved for
// the null terminator).
// On Linux, verify that thread names within the 15 character limit
// are set correctly (the 16th character is reserved for the null
// terminator).
{
testName(
"123456789012345",
"123456789012345"); // 15 chars, no truncation
testName(
"1234567890123456", "123456789012345"); // 16 chars, truncated
testName(
"ThisIsAVeryLongThreadNameExceedingLimit",
"ThisIsAVeryLong"); // 39 chars, truncated
"123456789012345"); // 15 chars, maximum allowed
testName("", ""); // empty name
testName("short", "short"); // short name, no truncation
testName("short", "short"); // short name
}
#endif
}

View File

@@ -75,7 +75,7 @@ public:
gate g1, g2;
std::shared_ptr<JobQueue::Coro> c;
env.app().getJobQueue().postCoro(
jtCLIENT, "Coroutine-Test", [&](auto const& cr) {
jtCLIENT, "CoroTest", [&](auto const& cr) {
c = cr;
g1.signal();
c->yield();
@@ -102,7 +102,7 @@ public:
gate g;
env.app().getJobQueue().postCoro(
jtCLIENT, "Coroutine-Test", [&](auto const& c) {
jtCLIENT, "CoroTest", [&](auto const& c) {
c->post();
c->yield();
g.signal();
@@ -128,7 +128,7 @@ public:
BEAST_EXPECT(*lv == -1);
gate g;
jq.addJob(jtCLIENT, "LocalValue-Test", [&]() {
jq.addJob(jtCLIENT, "LocalValTest", [&]() {
this->BEAST_EXPECT(*lv == -1);
*lv = -2;
this->BEAST_EXPECT(*lv == -2);
@@ -139,7 +139,7 @@ public:
for (int i = 0; i < N; ++i)
{
jq.postCoro(jtCLIENT, "Coroutine-Test", [&, id = i](auto const& c) {
jq.postCoro(jtCLIENT, "CoroTest", [&, id = i](auto const& c) {
a[id] = c;
g.signal();
c->yield();
@@ -167,7 +167,7 @@ public:
c->join();
}
jq.addJob(jtCLIENT, "LocalValue-Test", [&]() {
jq.addJob(jtCLIENT, "LocalValTest", [&]() {
this->BEAST_EXPECT(*lv == -2);
g.signal();
});

View File

@@ -138,9 +138,7 @@ RCLConsensus::Adaptor::acquireLedger(LedgerHash const& hash)
acquiringLedger_ = hash;
app_.getJobQueue().addJob(
jtADVANCE,
"getConsensusLedger1",
[id = hash, &app = app_, this]() {
jtADVANCE, "GetConsL1", [id = hash, &app = app_, this]() {
JLOG(j_.debug())
<< "JOB advanceLedger getConsensusLedger1 started";
app.getInboundLedgers().acquireAsync(
@@ -441,7 +439,7 @@ RCLConsensus::Adaptor::onAccept(
{
app_.getJobQueue().addJob(
jtACCEPT,
"acceptLedger",
"AcceptLedger",
[=, this, cj = std::move(consensusJson)]() mutable {
// Note that no lock is held or acquired during this job.
// This is because generic Consensus guarantees that once a ledger

View File

@@ -141,13 +141,11 @@ RCLValidationsAdaptor::acquire(LedgerHash const& hash)
Application* pApp = &app_;
app_.getJobQueue().addJob(
jtADVANCE, "getConsensusLedger2", [pApp, hash, this]() {
JLOG(j_.debug())
<< "JOB advanceLedger getConsensusLedger2 started";
pApp->getInboundLedgers().acquireAsync(
hash, 0, InboundLedger::Reason::CONSENSUS);
});
app_.getJobQueue().addJob(jtADVANCE, "GetConsL2", [pApp, hash, this]() {
JLOG(j_.debug()) << "JOB advanceLedger getConsensusLedger2 started";
pApp->getInboundLedgers().acquireAsync(
hash, 0, InboundLedger::Reason::CONSENSUS);
});
return std::nullopt;
}

View File

@@ -65,7 +65,7 @@ ConsensusTransSetSF::gotNode(
"ripple::ConsensusTransSetSF::gotNode : transaction hash "
"match");
auto const pap = &app_;
app_.getJobQueue().addJob(jtTRANSACTION, "TXS->TXN", [pap, stx]() {
app_.getJobQueue().addJob(jtTRANSACTION, "TxsToTxn", [pap, stx]() {
pap->getOPs().submitTransaction(stx);
});
}

View File

@@ -67,9 +67,9 @@ OrderBookDB::setup(std::shared_ptr<ReadView const> const& ledger)
update(ledger);
else
app_.getJobQueue().addJob(
jtUPDATE_PF,
"OrderBookDB::update: " + std::to_string(ledger->seq()),
[this, ledger]() { update(ledger); });
jtUPDATE_PF, "OrderBookUpd", [this, ledger]() {
update(ledger);
});
}
}

View File

@@ -473,7 +473,7 @@ InboundLedger::done()
// We hold the PeerSet lock, so must dispatch
app_.getJobQueue().addJob(
jtLEDGER_DATA, "AcquisitionDone", [self = shared_from_this()]() {
jtLEDGER_DATA, "AcqDone", [self = shared_from_this()]() {
if (self->complete_ && !self->failed_)
{
self->app_.getLedgerMaster().checkAccept(self->getLedger());

View File

@@ -212,7 +212,7 @@ public:
// dispatch
if (ledger->gotData(std::weak_ptr<Peer>(peer), packet))
app_.getJobQueue().addJob(
jtLEDGER_DATA, "processLedgerData", [ledger]() {
jtLEDGER_DATA, "ProcessLData", [ledger]() {
ledger->runData();
});
@@ -227,7 +227,7 @@ public:
if (packet->type() == protocol::liAS_NODE)
{
app_.getJobQueue().addJob(
jtLEDGER_DATA, "gotStaleData", [this, packet]() {
jtLEDGER_DATA, "GotStaleData", [this, packet]() {
gotStaleData(packet);
});
}

View File

@@ -39,7 +39,7 @@ LedgerDeltaAcquire::LedgerDeltaAcquire(
ledgerHash,
LedgerReplayParameters::SUB_TASK_TIMEOUT,
{jtREPLAY_TASK,
"LedgerReplayDelta",
"LedReplDelta",
LedgerReplayParameters::MAX_QUEUED_TASKS},
app.journal("LedgerReplayDelta"))
, inboundLedgers_(inboundLedgers)
@@ -243,7 +243,7 @@ LedgerDeltaAcquire::onLedgerBuilt(
}
app_.getJobQueue().addJob(
jtREPLAY_TASK,
"onLedgerBuilt",
"OnLedBuilt",
[=, ledger = this->fullLedger_, &app = this->app_]() {
for (auto reason : reasons)
{

View File

@@ -1363,7 +1363,7 @@ LedgerMaster::tryAdvance()
if (!mAdvanceThread && !mValidLedger.empty())
{
mAdvanceThread = true;
app_.getJobQueue().addJob(jtADVANCE, "advanceLedger", [this]() {
app_.getJobQueue().addJob(jtADVANCE, "AdvanceLedger", [this]() {
std::unique_lock sl(m_mutex);
XRPL_ASSERT(
@@ -1500,7 +1500,7 @@ bool
LedgerMaster::newPathRequest()
{
std::unique_lock ml(m_mutex);
mPathFindNewRequest = newPFWork("pf:newRequest", ml);
mPathFindNewRequest = newPFWork("PthFindNewReq", ml);
return mPathFindNewRequest;
}
@@ -1521,7 +1521,7 @@ LedgerMaster::newOrderBookDB()
std::unique_lock ml(m_mutex);
mPathLedger.reset();
return newPFWork("pf:newOBDB", ml);
return newPFWork("PthFindOBDB", ml);
}
/** A thread needs to be dispatched to handle pathfinding work of some kind.
@@ -1859,7 +1859,7 @@ LedgerMaster::fetchForHistory(
mFillInProgress = seq;
}
app_.getJobQueue().addJob(
jtADVANCE, "tryFill", [this, ledger]() {
jtADVANCE, "TryFill", [this, ledger]() {
tryFill(ledger);
});
}
@@ -1998,7 +1998,7 @@ LedgerMaster::doAdvance(std::unique_lock<std::recursive_mutex>& sl)
}
app_.getOPs().clearNeedNetworkLedger();
progress = newPFWork("pf:newLedger", sl);
progress = newPFWork("PthFindNewLed", sl);
}
if (progress)
mAdvanceWork = true;
@@ -2029,7 +2029,7 @@ LedgerMaster::gotFetchPack(bool progress, std::uint32_t seq)
{
if (!mGotFetchPackThread.test_and_set(std::memory_order_acquire))
{
app_.getJobQueue().addJob(jtLEDGER_DATA, "gotFetchPack", [&]() {
app_.getJobQueue().addJob(jtLEDGER_DATA, "GotFetchPack", [&]() {
app_.getInboundLedgers().gotFetchPack();
mGotFetchPackThread.clear(std::memory_order_release);
});

View File

@@ -96,7 +96,7 @@ LedgerReplayTask::LedgerReplayTask(
parameter.finishHash_,
LedgerReplayParameters::TASK_TIMEOUT,
{jtREPLAY_TASK,
"LedgerReplayTask",
"LedReplTask",
LedgerReplayParameters::MAX_QUEUED_TASKS},
app.journal("LedgerReplayTask"))
, inboundLedgers_(inboundLedgers)

View File

@@ -35,7 +35,7 @@ SkipListAcquire::SkipListAcquire(
ledgerHash,
LedgerReplayParameters::SUB_TASK_TIMEOUT,
{jtREPLAY_TASK,
"SkipListAcquire",
"SkipListAcq",
LedgerReplayParameters::MAX_QUEUED_TASKS},
app.journal("LedgerReplaySkipList"))
, inboundLedgers_(inboundLedgers)

View File

@@ -46,7 +46,7 @@ TransactionAcquire::TransactionAcquire(
app,
hash,
TX_ACQUIRE_TIMEOUT,
{jtTXN_DATA, "TransactionAcquire", {}},
{jtTXN_DATA, "TxAcq", {}},
app.journal("TransactionAcquire"))
, mHaveRoot(false)
, mPeerSet(std::move(peerSet))
@@ -79,7 +79,7 @@ TransactionAcquire::done()
// just updates the consensus and related structures when we acquire
// a transaction set. No need to update them if we're shutting down.
app_.getJobQueue().addJob(
jtTXN_DATA, "completeAcquire", [pap, hash, map]() {
jtTXN_DATA, "ComplAcquire", [pap, hash, map]() {
pap->getInboundTransactions().giveSet(hash, map, true);
});
}

View File

@@ -344,8 +344,7 @@ run(int argc, char** argv)
{
using namespace std;
beast::setCurrentThreadName(
"rippled: main " + BuildInfo::getVersionString());
beast::setCurrentThreadName("main");
po::variables_map vm;

View File

@@ -31,9 +31,8 @@ NodeStoreScheduler::scheduleTask(NodeStore::Task& task)
if (jobQueue_.isStopped())
return;
if (!jobQueue_.addJob(jtWRITE, "NodeObject::store", [&task]() {
task.performScheduledTask();
}))
if (!jobQueue_.addJob(
jtWRITE, "NObjStore", [&task]() { task.performScheduledTask(); }))
{
// Job not added, presumably because we're shutting down.
// Recover by executing the task synchronously.

View File

@@ -996,7 +996,7 @@ NetworkOPsImp::setHeartbeatTimer()
heartbeatTimer_,
mConsensus.parms().ledgerGRANULARITY,
[this]() {
m_job_queue.addJob(jtNETOP_TIMER, "NetOPs.heartbeat", [this]() {
m_job_queue.addJob(jtNETOP_TIMER, "NetHeart", [this]() {
processHeartbeatTimer();
});
},
@@ -1012,7 +1012,7 @@ NetworkOPsImp::setClusterTimer()
clusterTimer_,
10s,
[this]() {
m_job_queue.addJob(jtNETOP_CLUSTER, "NetOPs.cluster", [this]() {
m_job_queue.addJob(jtNETOP_CLUSTER, "NetCluster", [this]() {
processClusterTimer();
});
},
@@ -1240,7 +1240,7 @@ NetworkOPsImp::submitTransaction(std::shared_ptr<STTx const> const& iTrans)
auto tx = std::make_shared<Transaction>(trans, reason, app_);
m_job_queue.addJob(jtTRANSACTION, "submitTxn", [this, tx]() {
m_job_queue.addJob(jtTRANSACTION, "SubmitTxn", [this, tx]() {
auto t = tx;
processTransaction(t, false, false, FailHard::no);
});
@@ -1338,7 +1338,7 @@ NetworkOPsImp::doTransactionAsync(
if (mDispatchState == DispatchState::none)
{
if (m_job_queue.addJob(
jtBATCH, "transactionBatch", [this]() { transactionBatch(); }))
jtBATCH, "TxBatchAsync", [this]() { transactionBatch(); }))
{
mDispatchState = DispatchState::scheduled;
}
@@ -1385,7 +1385,7 @@ NetworkOPsImp::doTransactionSyncBatch(
if (mTransactions.size())
{
// More transactions need to be applied, but by another job.
if (m_job_queue.addJob(jtBATCH, "transactionBatch", [this]() {
if (m_job_queue.addJob(jtBATCH, "TxBatchSync", [this]() {
transactionBatch();
}))
{
@@ -3223,19 +3223,16 @@ NetworkOPsImp::reportFeeChange()
if (f != mLastFeeSummary)
{
m_job_queue.addJob(
jtCLIENT_FEE_CHANGE, "reportFeeChange->pubServer", [this]() {
pubServer();
});
jtCLIENT_FEE_CHANGE, "PubFee", [this]() { pubServer(); });
}
}
void
NetworkOPsImp::reportConsensusStateChange(ConsensusPhase phase)
{
m_job_queue.addJob(
jtCLIENT_CONSENSUS,
"reportConsensusStateChange->pubConsensus",
[this, phase]() { pubConsensus(phase); });
m_job_queue.addJob(jtCLIENT_CONSENSUS, "PubCons", [this, phase]() {
pubConsensus(phase);
});
}
inline void
@@ -3744,7 +3741,7 @@ NetworkOPsImp::addAccountHistoryJob(SubAccountHistoryInfoWeak subInfo)
app_.getJobQueue().addJob(
jtCLIENT_ACCT_HIST,
"AccountHistoryTxStream",
"HistTxStream",
[this, dbType = databaseType, subInfo]() {
auto const& accountId = subInfo.index_->accountId_;
auto& lastLedgerSeq = subInfo.index_->historyLastLedgerSeq_;

View File

@@ -61,7 +61,7 @@ Job::queue_time() const
void
Job::doJob()
{
beast::setCurrentThreadName("doJob: " + mName);
beast::setCurrentThreadName("j:" + mName);
m_loadEvent->start();
m_loadEvent->setName(mName);

View File

@@ -1079,7 +1079,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMManifests> const& m)
fee_.update(Resource::feeModerateBurdenPeer, "oversize");
app_.getJobQueue().addJob(
jtMANIFEST, "receiveManifests", [this, that = shared_from_this(), m]() {
jtMANIFEST, "RcvManifests", [this, that = shared_from_this(), m]() {
overlay_.onManifests(m, that);
});
}
@@ -1373,7 +1373,7 @@ PeerImp::handleTransaction(
{
app_.getJobQueue().addJob(
jtTRANSACTION,
"recvTransaction->checkTransaction",
"RcvCheckTx",
[weak = std::weak_ptr<PeerImp>(shared_from_this()),
flags,
checkSignature,
@@ -1476,7 +1476,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMGetLedger> const& m)
// Queue a job to process the request
std::weak_ptr<PeerImp> weak = shared_from_this();
app_.getJobQueue().addJob(jtLEDGER_REQ, "recvGetLedger", [weak, m]() {
app_.getJobQueue().addJob(jtLEDGER_REQ, "RcvGetLedger", [weak, m]() {
if (auto peer = weak.lock())
peer->processLedgerRequest(m);
});
@@ -1496,29 +1496,27 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMProofPathRequest> const& m)
fee_.update(
Resource::feeModerateBurdenPeer, "received a proof path request");
std::weak_ptr<PeerImp> weak = shared_from_this();
app_.getJobQueue().addJob(
jtREPLAY_REQ, "recvProofPathRequest", [weak, m]() {
if (auto peer = weak.lock())
app_.getJobQueue().addJob(jtREPLAY_REQ, "RcvProofPReq", [weak, m]() {
if (auto peer = weak.lock())
{
auto reply =
peer->ledgerReplayMsgHandler_.processProofPathRequest(m);
if (reply.has_error())
{
auto reply =
peer->ledgerReplayMsgHandler_.processProofPathRequest(m);
if (reply.has_error())
{
if (reply.error() == protocol::TMReplyError::reBAD_REQUEST)
peer->charge(
Resource::feeMalformedRequest,
"proof_path_request");
else
peer->charge(
Resource::feeRequestNoReply, "proof_path_request");
}
if (reply.error() == protocol::TMReplyError::reBAD_REQUEST)
peer->charge(
Resource::feeMalformedRequest, "proof_path_request");
else
{
peer->send(std::make_shared<Message>(
reply, protocol::mtPROOF_PATH_RESPONSE));
}
peer->charge(
Resource::feeRequestNoReply, "proof_path_request");
}
});
else
{
peer->send(std::make_shared<Message>(
reply, protocol::mtPROOF_PATH_RESPONSE));
}
}
});
}
void
@@ -1550,30 +1548,27 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMReplayDeltaRequest> const& m)
fee_.fee = Resource::feeModerateBurdenPeer;
std::weak_ptr<PeerImp> weak = shared_from_this();
app_.getJobQueue().addJob(
jtREPLAY_REQ, "recvReplayDeltaRequest", [weak, m]() {
if (auto peer = weak.lock())
app_.getJobQueue().addJob(jtREPLAY_REQ, "RcvReplDReq", [weak, m]() {
if (auto peer = weak.lock())
{
auto reply =
peer->ledgerReplayMsgHandler_.processReplayDeltaRequest(m);
if (reply.has_error())
{
auto reply =
peer->ledgerReplayMsgHandler_.processReplayDeltaRequest(m);
if (reply.has_error())
{
if (reply.error() == protocol::TMReplyError::reBAD_REQUEST)
peer->charge(
Resource::feeMalformedRequest,
"replay_delta_request");
else
peer->charge(
Resource::feeRequestNoReply,
"replay_delta_request");
}
if (reply.error() == protocol::TMReplyError::reBAD_REQUEST)
peer->charge(
Resource::feeMalformedRequest, "replay_delta_request");
else
{
peer->send(std::make_shared<Message>(
reply, protocol::mtREPLAY_DELTA_RESPONSE));
}
peer->charge(
Resource::feeRequestNoReply, "replay_delta_request");
}
});
else
{
peer->send(std::make_shared<Message>(
reply, protocol::mtREPLAY_DELTA_RESPONSE));
}
}
});
}
void
@@ -1669,7 +1664,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMLedgerData> const& m)
{
std::weak_ptr<PeerImp> weak{shared_from_this()};
app_.getJobQueue().addJob(
jtTXN_DATA, "recvPeerData", [weak, ledgerHash, m]() {
jtTXN_DATA, "RcvPeerData", [weak, ledgerHash, m]() {
if (auto peer = weak.lock())
{
peer->app_.getInboundTransactions().gotData(
@@ -1797,7 +1792,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMProposeSet> const& m)
std::weak_ptr<PeerImp> weak = shared_from_this();
app_.getJobQueue().addJob(
isTrusted ? jtPROPOSAL_t : jtPROPOSAL_ut,
"recvPropose->checkPropose",
"checkPropose",
[weak, isTrusted, m, proposal]() {
if (auto peer = weak.lock())
peer->checkPropose(isTrusted, m, proposal);
@@ -2423,18 +2418,7 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMValidation> const& m)
}
else if (isTrusted || !app_.getFeeTrack().isLoadedLocal())
{
std::string const name = [isTrusted, val]() {
std::string ret =
isTrusted ? "Trusted validation" : "Untrusted validation";
#ifdef DEBUG
ret += " " +
std::to_string(val->getFieldU32(sfLedgerSequence)) + ": " +
to_string(val->getNodeID());
#endif
return ret;
}();
std::string const name = isTrusted ? "ChkTrust" : "ChkUntrust";
std::weak_ptr<PeerImp> weak = shared_from_this();
app_.getJobQueue().addJob(
@@ -2494,11 +2478,10 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMGetObjectByHash> const& m)
}
std::weak_ptr<PeerImp> weak = shared_from_this();
app_.getJobQueue().addJob(
jtREQUESTED_TXN, "doTransactions", [weak, m]() {
if (auto peer = weak.lock())
peer->doTransactions(m);
});
app_.getJobQueue().addJob(jtREQUESTED_TXN, "DoTxs", [weak, m]() {
if (auto peer = weak.lock())
peer->doTransactions(m);
});
return;
}
@@ -2638,11 +2621,10 @@ PeerImp::onMessage(std::shared_ptr<protocol::TMHaveTransactions> const& m)
}
std::weak_ptr<PeerImp> weak = shared_from_this();
app_.getJobQueue().addJob(
jtMISSING_TXN, "handleHaveTransactions", [weak, m]() {
if (auto peer = weak.lock())
peer->handleHaveTransactions(m);
});
app_.getJobQueue().addJob(jtMISSING_TXN, "HandleHaveTxs", [weak, m]() {
if (auto peer = weak.lock())
peer->handleHaveTransactions(m);
});
}
void

View File

@@ -91,7 +91,7 @@ public:
JLOG(j_.info()) << "RPCCall::fromNetwork start";
mSending = m_jobQueue.addJob(
jtCLIENT_SUBSCRIBE, "RPCSub::sendThread", [this]() {
jtCLIENT_SUBSCRIBE, "RPCSubSendThr", [this]() {
sendThread();
});
}