mirror of
https://github.com/XRPLF/rippled.git
synced 2025-11-20 19:15:54 +00:00
Merge branch 'develop' into vault
This commit is contained in:
@@ -420,6 +420,7 @@
|
|||||||
# - r.ripple.com 51235
|
# - r.ripple.com 51235
|
||||||
# - sahyadri.isrdc.in 51235
|
# - sahyadri.isrdc.in 51235
|
||||||
# - hubs.xrpkuwait.com 51235
|
# - hubs.xrpkuwait.com 51235
|
||||||
|
# - hub.xrpl-commons.org 51235
|
||||||
#
|
#
|
||||||
# Examples:
|
# Examples:
|
||||||
#
|
#
|
||||||
|
|||||||
@@ -1555,10 +1555,10 @@ ApplicationImp::run()
|
|||||||
if (!config_->standalone())
|
if (!config_->standalone())
|
||||||
{
|
{
|
||||||
// VFALCO NOTE This seems unnecessary. If we properly refactor the load
|
// VFALCO NOTE This seems unnecessary. If we properly refactor the load
|
||||||
// manager then the deadlock detector can just always be
|
// manager then the stall detector can just always be
|
||||||
// "armed"
|
// "armed"
|
||||||
//
|
//
|
||||||
getLoadManager().activateDeadlockDetector();
|
getLoadManager().activateStallDetector();
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -32,7 +32,7 @@
|
|||||||
namespace ripple {
|
namespace ripple {
|
||||||
|
|
||||||
LoadManager::LoadManager(Application& app, beast::Journal journal)
|
LoadManager::LoadManager(Application& app, beast::Journal journal)
|
||||||
: app_(app), journal_(journal), deadLock_(), armed_(false)
|
: app_(app), journal_(journal), lastHeartbeat_(), armed_(false)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -53,19 +53,19 @@ LoadManager::~LoadManager()
|
|||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
void
|
void
|
||||||
LoadManager::activateDeadlockDetector()
|
LoadManager::activateStallDetector()
|
||||||
{
|
{
|
||||||
std::lock_guard sl(mutex_);
|
std::lock_guard sl(mutex_);
|
||||||
armed_ = true;
|
armed_ = true;
|
||||||
deadLock_ = std::chrono::steady_clock::now();
|
lastHeartbeat_ = std::chrono::steady_clock::now();
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
LoadManager::resetDeadlockDetector()
|
LoadManager::heartbeat()
|
||||||
{
|
{
|
||||||
auto const detector_start = std::chrono::steady_clock::now();
|
auto const heartbeat = std::chrono::steady_clock::now();
|
||||||
std::lock_guard sl(mutex_);
|
std::lock_guard sl(mutex_);
|
||||||
deadLock_ = detector_start;
|
lastHeartbeat_ = heartbeat;
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@@ -118,63 +118,62 @@ LoadManager::run()
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
// Copy out shared data under a lock. Use copies outside lock.
|
// Copy out shared data under a lock. Use copies outside lock.
|
||||||
auto const deadLock = deadLock_;
|
auto const lastHeartbeat = lastHeartbeat_;
|
||||||
auto const armed = armed_;
|
auto const armed = armed_;
|
||||||
sl.unlock();
|
sl.unlock();
|
||||||
|
|
||||||
// Measure the amount of time we have been deadlocked, in seconds.
|
// Measure the amount of time we have been stalled, in seconds.
|
||||||
using namespace std::chrono;
|
using namespace std::chrono;
|
||||||
auto const timeSpentDeadlocked =
|
auto const timeSpentStalled =
|
||||||
duration_cast<seconds>(steady_clock::now() - deadLock);
|
duration_cast<seconds>(steady_clock::now() - lastHeartbeat);
|
||||||
|
|
||||||
constexpr auto reportingIntervalSeconds = 10s;
|
constexpr auto reportingIntervalSeconds = 10s;
|
||||||
constexpr auto deadlockFatalLogMessageTimeLimit = 90s;
|
constexpr auto stallFatalLogMessageTimeLimit = 90s;
|
||||||
constexpr auto deadlockLogicErrorTimeLimit = 600s;
|
constexpr auto stallLogicErrorTimeLimit = 600s;
|
||||||
|
|
||||||
if (armed && (timeSpentDeadlocked >= reportingIntervalSeconds))
|
if (armed && (timeSpentStalled >= reportingIntervalSeconds))
|
||||||
{
|
{
|
||||||
// Report the deadlocked condition every
|
// Report the stalled condition every reportingIntervalSeconds
|
||||||
// reportingIntervalSeconds
|
if ((timeSpentStalled % reportingIntervalSeconds) == 0s)
|
||||||
if ((timeSpentDeadlocked % reportingIntervalSeconds) == 0s)
|
|
||||||
{
|
{
|
||||||
if (timeSpentDeadlocked < deadlockFatalLogMessageTimeLimit)
|
if (timeSpentStalled < stallFatalLogMessageTimeLimit)
|
||||||
{
|
{
|
||||||
JLOG(journal_.warn())
|
JLOG(journal_.warn())
|
||||||
<< "Server stalled for " << timeSpentDeadlocked.count()
|
<< "Server stalled for " << timeSpentStalled.count()
|
||||||
<< " seconds.";
|
<< " seconds.";
|
||||||
|
|
||||||
if (app_.getJobQueue().isOverloaded())
|
if (app_.getJobQueue().isOverloaded())
|
||||||
{
|
{
|
||||||
JLOG(journal_.warn()) << app_.getJobQueue().getJson(0);
|
JLOG(journal_.warn())
|
||||||
|
<< "JobQueue: " << app_.getJobQueue().getJson(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
JLOG(journal_.fatal())
|
JLOG(journal_.fatal())
|
||||||
<< "Deadlock detected. Deadlocked time: "
|
<< "Server stalled for " << timeSpentStalled.count()
|
||||||
<< timeSpentDeadlocked.count() << "s";
|
<< " seconds.";
|
||||||
JLOG(journal_.fatal())
|
JLOG(journal_.fatal())
|
||||||
<< "JobQueue: " << app_.getJobQueue().getJson(0);
|
<< "JobQueue: " << app_.getJobQueue().getJson(0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we go over the deadlockTimeLimit spent deadlocked, it
|
// If we go over the stallLogicErrorTimeLimit spent stalled, it
|
||||||
// means that the deadlock resolution code has failed, which
|
// means that the stall resolution code has failed, which qualifies
|
||||||
// qualifies as undefined behavior.
|
// as a LogicError
|
||||||
//
|
if (timeSpentStalled >= stallLogicErrorTimeLimit)
|
||||||
if (timeSpentDeadlocked >= deadlockLogicErrorTimeLimit)
|
|
||||||
{
|
{
|
||||||
JLOG(journal_.fatal())
|
JLOG(journal_.fatal())
|
||||||
<< "LogicError: Deadlock detected. Deadlocked time: "
|
<< "LogicError: Fatal server stall detected. Stalled time: "
|
||||||
<< timeSpentDeadlocked.count() << "s";
|
<< timeSpentStalled.count() << "s";
|
||||||
JLOG(journal_.fatal())
|
JLOG(journal_.fatal())
|
||||||
<< "JobQueue: " << app_.getJobQueue().getJson(0);
|
<< "JobQueue: " << app_.getJobQueue().getJson(0);
|
||||||
LogicError("Deadlock detected");
|
LogicError("Fatal server stall detected");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool change;
|
bool change = false;
|
||||||
|
|
||||||
if (app_.getJobQueue().isOverloaded())
|
if (app_.getJobQueue().isOverloaded())
|
||||||
{
|
{
|
||||||
JLOG(journal_.info()) << "Raising local fee (JQ overload): "
|
JLOG(journal_.info()) << "Raising local fee (JQ overload): "
|
||||||
|
|||||||
@@ -58,28 +58,28 @@ public:
|
|||||||
*/
|
*/
|
||||||
~LoadManager();
|
~LoadManager();
|
||||||
|
|
||||||
/** Turn on deadlock detection.
|
/** Turn on stall detection.
|
||||||
|
|
||||||
The deadlock detector begins in a disabled state. After this function
|
The stall detector begins in a disabled state. After this function
|
||||||
is called, it will report deadlocks using a separate thread whenever
|
is called, it will report stalls using a separate thread whenever
|
||||||
the reset function is not called at least once per 10 seconds.
|
the reset function is not called at least once per 10 seconds.
|
||||||
|
|
||||||
@see resetDeadlockDetector
|
@see resetStallDetector
|
||||||
*/
|
*/
|
||||||
// VFALCO NOTE it seems that the deadlock detector has an "armed" state
|
// VFALCO NOTE it seems that the stall detector has an "armed" state
|
||||||
// to prevent it from going off during program startup if
|
// to prevent it from going off during program startup if
|
||||||
// there's a lengthy initialization operation taking place?
|
// there's a lengthy initialization operation taking place?
|
||||||
//
|
//
|
||||||
void
|
void
|
||||||
activateDeadlockDetector();
|
activateStallDetector();
|
||||||
|
|
||||||
/** Reset the deadlock detection timer.
|
/** Reset the stall detection timer.
|
||||||
|
|
||||||
A dedicated thread monitors the deadlock timer, and if too much
|
A dedicated thread monitors the stall timer, and if too much
|
||||||
time passes it will produce log warnings.
|
time passes it will produce log warnings.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
resetDeadlockDetector();
|
heartbeat();
|
||||||
|
|
||||||
//--------------------------------------------------------------------------
|
//--------------------------------------------------------------------------
|
||||||
|
|
||||||
@@ -98,12 +98,12 @@ private:
|
|||||||
beast::Journal const journal_;
|
beast::Journal const journal_;
|
||||||
|
|
||||||
std::thread thread_;
|
std::thread thread_;
|
||||||
std::mutex mutex_; // Guards deadLock_, armed_, cv_
|
std::mutex mutex_; // Guards lastHeartbeat_, armed_, cv_
|
||||||
std::condition_variable cv_;
|
std::condition_variable cv_;
|
||||||
bool stop_ = false;
|
bool stop_ = false;
|
||||||
|
|
||||||
std::chrono::steady_clock::time_point
|
// Detect server stalls
|
||||||
deadLock_; // Detect server deadlocks.
|
std::chrono::steady_clock::time_point lastHeartbeat_;
|
||||||
bool armed_;
|
bool armed_;
|
||||||
|
|
||||||
friend std::unique_ptr<LoadManager>
|
friend std::unique_ptr<LoadManager>
|
||||||
|
|||||||
@@ -1015,7 +1015,7 @@ NetworkOPsImp::processHeartbeatTimer()
|
|||||||
|
|
||||||
// VFALCO NOTE This is for diagnosing a crash on exit
|
// VFALCO NOTE This is for diagnosing a crash on exit
|
||||||
LoadManager& mgr(app_.getLoadManager());
|
LoadManager& mgr(app_.getLoadManager());
|
||||||
mgr.resetDeadlockDetector();
|
mgr.heartbeat();
|
||||||
|
|
||||||
std::size_t const numPeers = app_.overlay().size();
|
std::size_t const numPeers = app_.overlay().size();
|
||||||
|
|
||||||
|
|||||||
@@ -503,6 +503,9 @@ OverlayImpl::start()
|
|||||||
|
|
||||||
// Pool of servers operated by @Xrpkuwait - https://xrpkuwait.com
|
// Pool of servers operated by @Xrpkuwait - https://xrpkuwait.com
|
||||||
bootstrapIps.push_back("hubs.xrpkuwait.com 51235");
|
bootstrapIps.push_back("hubs.xrpkuwait.com 51235");
|
||||||
|
|
||||||
|
// Pool of servers operated by XRPL Commons - https://xrpl-commons.org
|
||||||
|
bootstrapIps.push_back("hub.xrpl-commons.org 51235");
|
||||||
}
|
}
|
||||||
|
|
||||||
m_resolver.resolve(
|
m_resolver.resolve(
|
||||||
|
|||||||
Reference in New Issue
Block a user