mirror of
https://github.com/XRPLF/rippled.git
synced 2025-11-20 11:05:54 +00:00
Increase the deadlock detection timeout:
It's possible an overloaded job queue is causing false alarms on the deadlock detector. Log a fatal message after 90s, declare a logic error after 600s.
This commit is contained in:
@@ -126,28 +126,51 @@ void LoadManager::run ()
|
|||||||
auto const timeSpentDeadlocked =
|
auto const timeSpentDeadlocked =
|
||||||
duration_cast<seconds>(steady_clock::now() - deadLock);
|
duration_cast<seconds>(steady_clock::now() - deadLock);
|
||||||
|
|
||||||
auto const reportingIntervalSeconds = 10s;
|
constexpr auto reportingIntervalSeconds = 10s;
|
||||||
|
constexpr auto deadlockFatalLogMessageTimeLimit = 90s;
|
||||||
|
constexpr auto deadlockLogicErrorTimeLimit = 600s;
|
||||||
if (armed && (timeSpentDeadlocked >= reportingIntervalSeconds))
|
if (armed && (timeSpentDeadlocked >= reportingIntervalSeconds))
|
||||||
{
|
{
|
||||||
// Report the deadlocked condition every 10 seconds
|
|
||||||
|
// Report the deadlocked condition every reportingIntervalSeconds
|
||||||
if ((timeSpentDeadlocked % reportingIntervalSeconds) == 0s)
|
if ((timeSpentDeadlocked % reportingIntervalSeconds) == 0s)
|
||||||
|
{
|
||||||
|
if (timeSpentDeadlocked < deadlockFatalLogMessageTimeLimit)
|
||||||
{
|
{
|
||||||
JLOG(journal_.warn())
|
JLOG(journal_.warn())
|
||||||
<< "Server stalled for "
|
<< "Server stalled for "
|
||||||
<< timeSpentDeadlocked.count() << " seconds.";
|
<< timeSpentDeadlocked.count() << " seconds.";
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
JLOG(journal_.fatal())
|
||||||
|
<< "Deadlock detected. Deadlocked time: "
|
||||||
|
<< timeSpentDeadlocked.count() << "s";
|
||||||
|
if (app_.getJobQueue().isOverloaded())
|
||||||
|
{
|
||||||
|
JLOG(journal_.fatal())
|
||||||
|
<< app_.getJobQueue().getJson(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// If we go over 90 seconds spent deadlocked, it means that
|
// If we go over the deadlockTimeLimit spent deadlocked, it means that
|
||||||
// the deadlock resolution code has failed, which qualifies
|
// the deadlock resolution code has failed, which qualifies
|
||||||
// as undefined behavior.
|
// as undefined behavior.
|
||||||
//
|
//
|
||||||
constexpr auto deadlockTimeLimit = 90s;
|
if (timeSpentDeadlocked >= deadlockLogicErrorTimeLimit)
|
||||||
assert (timeSpentDeadlocked < deadlockTimeLimit);
|
{
|
||||||
|
JLOG(journal_.fatal())
|
||||||
if (timeSpentDeadlocked >= deadlockTimeLimit)
|
<< "LogicError: Deadlock detected. Deadlocked time: "
|
||||||
|
<< timeSpentDeadlocked.count() << "s";
|
||||||
|
if (app_.getJobQueue().isOverloaded())
|
||||||
|
{
|
||||||
|
JLOG(journal_.fatal()) << app_.getJobQueue().getJson(0);
|
||||||
|
}
|
||||||
LogicError("Deadlock detected");
|
LogicError("Deadlock detected");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool change = false;
|
bool change = false;
|
||||||
if (app_.getJobQueue ().isOverloaded ())
|
if (app_.getJobQueue ().isOverloaded ())
|
||||||
|
|||||||
Reference in New Issue
Block a user