mirror of
https://github.com/XRPLF/rippled.git
synced 2025-11-19 18:45:52 +00:00
Merge branch 'develop' into vault
This commit is contained in:
@@ -420,6 +420,7 @@
|
||||
# - r.ripple.com 51235
|
||||
# - sahyadri.isrdc.in 51235
|
||||
# - hubs.xrpkuwait.com 51235
|
||||
# - hub.xrpl-commons.org 51235
|
||||
#
|
||||
# Examples:
|
||||
#
|
||||
|
||||
@@ -1555,10 +1555,10 @@ ApplicationImp::run()
|
||||
if (!config_->standalone())
|
||||
{
|
||||
// VFALCO NOTE This seems unnecessary. If we properly refactor the load
|
||||
// manager then the deadlock detector can just always be
|
||||
// manager then the stall detector can just always be
|
||||
// "armed"
|
||||
//
|
||||
getLoadManager().activateDeadlockDetector();
|
||||
getLoadManager().activateStallDetector();
|
||||
}
|
||||
|
||||
{
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
namespace ripple {
|
||||
|
||||
LoadManager::LoadManager(Application& app, beast::Journal journal)
|
||||
: app_(app), journal_(journal), deadLock_(), armed_(false)
|
||||
: app_(app), journal_(journal), lastHeartbeat_(), armed_(false)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -53,19 +53,19 @@ LoadManager::~LoadManager()
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
void
|
||||
LoadManager::activateDeadlockDetector()
|
||||
LoadManager::activateStallDetector()
|
||||
{
|
||||
std::lock_guard sl(mutex_);
|
||||
armed_ = true;
|
||||
deadLock_ = std::chrono::steady_clock::now();
|
||||
lastHeartbeat_ = std::chrono::steady_clock::now();
|
||||
}
|
||||
|
||||
void
|
||||
LoadManager::resetDeadlockDetector()
|
||||
LoadManager::heartbeat()
|
||||
{
|
||||
auto const detector_start = std::chrono::steady_clock::now();
|
||||
auto const heartbeat = std::chrono::steady_clock::now();
|
||||
std::lock_guard sl(mutex_);
|
||||
deadLock_ = detector_start;
|
||||
lastHeartbeat_ = heartbeat;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
@@ -118,63 +118,62 @@ LoadManager::run()
|
||||
break;
|
||||
|
||||
// Copy out shared data under a lock. Use copies outside lock.
|
||||
auto const deadLock = deadLock_;
|
||||
auto const lastHeartbeat = lastHeartbeat_;
|
||||
auto const armed = armed_;
|
||||
sl.unlock();
|
||||
|
||||
// Measure the amount of time we have been deadlocked, in seconds.
|
||||
// Measure the amount of time we have been stalled, in seconds.
|
||||
using namespace std::chrono;
|
||||
auto const timeSpentDeadlocked =
|
||||
duration_cast<seconds>(steady_clock::now() - deadLock);
|
||||
auto const timeSpentStalled =
|
||||
duration_cast<seconds>(steady_clock::now() - lastHeartbeat);
|
||||
|
||||
constexpr auto reportingIntervalSeconds = 10s;
|
||||
constexpr auto deadlockFatalLogMessageTimeLimit = 90s;
|
||||
constexpr auto deadlockLogicErrorTimeLimit = 600s;
|
||||
constexpr auto stallFatalLogMessageTimeLimit = 90s;
|
||||
constexpr auto stallLogicErrorTimeLimit = 600s;
|
||||
|
||||
if (armed && (timeSpentDeadlocked >= reportingIntervalSeconds))
|
||||
if (armed && (timeSpentStalled >= reportingIntervalSeconds))
|
||||
{
|
||||
// Report the deadlocked condition every
|
||||
// reportingIntervalSeconds
|
||||
if ((timeSpentDeadlocked % reportingIntervalSeconds) == 0s)
|
||||
// Report the stalled condition every reportingIntervalSeconds
|
||||
if ((timeSpentStalled % reportingIntervalSeconds) == 0s)
|
||||
{
|
||||
if (timeSpentDeadlocked < deadlockFatalLogMessageTimeLimit)
|
||||
if (timeSpentStalled < stallFatalLogMessageTimeLimit)
|
||||
{
|
||||
JLOG(journal_.warn())
|
||||
<< "Server stalled for " << timeSpentDeadlocked.count()
|
||||
<< "Server stalled for " << timeSpentStalled.count()
|
||||
<< " seconds.";
|
||||
|
||||
if (app_.getJobQueue().isOverloaded())
|
||||
{
|
||||
JLOG(journal_.warn()) << app_.getJobQueue().getJson(0);
|
||||
JLOG(journal_.warn())
|
||||
<< "JobQueue: " << app_.getJobQueue().getJson(0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
JLOG(journal_.fatal())
|
||||
<< "Deadlock detected. Deadlocked time: "
|
||||
<< timeSpentDeadlocked.count() << "s";
|
||||
<< "Server stalled for " << timeSpentStalled.count()
|
||||
<< " seconds.";
|
||||
JLOG(journal_.fatal())
|
||||
<< "JobQueue: " << app_.getJobQueue().getJson(0);
|
||||
}
|
||||
}
|
||||
|
||||
// If we go over the deadlockTimeLimit spent deadlocked, it
|
||||
// means that the deadlock resolution code has failed, which
|
||||
// qualifies as undefined behavior.
|
||||
//
|
||||
if (timeSpentDeadlocked >= deadlockLogicErrorTimeLimit)
|
||||
// If we go over the stallLogicErrorTimeLimit spent stalled, it
|
||||
// means that the stall resolution code has failed, which qualifies
|
||||
// as a LogicError
|
||||
if (timeSpentStalled >= stallLogicErrorTimeLimit)
|
||||
{
|
||||
JLOG(journal_.fatal())
|
||||
<< "LogicError: Deadlock detected. Deadlocked time: "
|
||||
<< timeSpentDeadlocked.count() << "s";
|
||||
<< "LogicError: Fatal server stall detected. Stalled time: "
|
||||
<< timeSpentStalled.count() << "s";
|
||||
JLOG(journal_.fatal())
|
||||
<< "JobQueue: " << app_.getJobQueue().getJson(0);
|
||||
LogicError("Deadlock detected");
|
||||
LogicError("Fatal server stall detected");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool change;
|
||||
|
||||
bool change = false;
|
||||
if (app_.getJobQueue().isOverloaded())
|
||||
{
|
||||
JLOG(journal_.info()) << "Raising local fee (JQ overload): "
|
||||
|
||||
@@ -58,28 +58,28 @@ public:
|
||||
*/
|
||||
~LoadManager();
|
||||
|
||||
/** Turn on deadlock detection.
|
||||
/** Turn on stall detection.
|
||||
|
||||
The deadlock detector begins in a disabled state. After this function
|
||||
is called, it will report deadlocks using a separate thread whenever
|
||||
The stall detector begins in a disabled state. After this function
|
||||
is called, it will report stalls using a separate thread whenever
|
||||
the reset function is not called at least once per 10 seconds.
|
||||
|
||||
@see resetDeadlockDetector
|
||||
@see resetStallDetector
|
||||
*/
|
||||
// VFALCO NOTE it seems that the deadlock detector has an "armed" state
|
||||
// VFALCO NOTE it seems that the stall detector has an "armed" state
|
||||
// to prevent it from going off during program startup if
|
||||
// there's a lengthy initialization operation taking place?
|
||||
//
|
||||
void
|
||||
activateDeadlockDetector();
|
||||
activateStallDetector();
|
||||
|
||||
/** Reset the deadlock detection timer.
|
||||
/** Reset the stall detection timer.
|
||||
|
||||
A dedicated thread monitors the deadlock timer, and if too much
|
||||
A dedicated thread monitors the stall timer, and if too much
|
||||
time passes it will produce log warnings.
|
||||
*/
|
||||
void
|
||||
resetDeadlockDetector();
|
||||
heartbeat();
|
||||
|
||||
//--------------------------------------------------------------------------
|
||||
|
||||
@@ -98,12 +98,12 @@ private:
|
||||
beast::Journal const journal_;
|
||||
|
||||
std::thread thread_;
|
||||
std::mutex mutex_; // Guards deadLock_, armed_, cv_
|
||||
std::mutex mutex_; // Guards lastHeartbeat_, armed_, cv_
|
||||
std::condition_variable cv_;
|
||||
bool stop_ = false;
|
||||
|
||||
std::chrono::steady_clock::time_point
|
||||
deadLock_; // Detect server deadlocks.
|
||||
// Detect server stalls
|
||||
std::chrono::steady_clock::time_point lastHeartbeat_;
|
||||
bool armed_;
|
||||
|
||||
friend std::unique_ptr<LoadManager>
|
||||
|
||||
@@ -1015,7 +1015,7 @@ NetworkOPsImp::processHeartbeatTimer()
|
||||
|
||||
// VFALCO NOTE This is for diagnosing a crash on exit
|
||||
LoadManager& mgr(app_.getLoadManager());
|
||||
mgr.resetDeadlockDetector();
|
||||
mgr.heartbeat();
|
||||
|
||||
std::size_t const numPeers = app_.overlay().size();
|
||||
|
||||
|
||||
@@ -503,6 +503,9 @@ OverlayImpl::start()
|
||||
|
||||
// Pool of servers operated by @Xrpkuwait - https://xrpkuwait.com
|
||||
bootstrapIps.push_back("hubs.xrpkuwait.com 51235");
|
||||
|
||||
// Pool of servers operated by XRPL Commons - https://xrpl-commons.org
|
||||
bootstrapIps.push_back("hub.xrpl-commons.org 51235");
|
||||
}
|
||||
|
||||
m_resolver.resolve(
|
||||
|
||||
Reference in New Issue
Block a user