mirror of
https://github.com/XRPLF/rippled.git
synced 2025-12-06 17:27:55 +00:00
Detect server deadlocks and trigger the auto-restart mechanism.
This commit is contained in:
@@ -22,7 +22,7 @@ int upTime()
|
|||||||
|
|
||||||
LoadManager::LoadManager(int creditRate, int creditLimit, int debitWarn, int debitLimit) :
|
LoadManager::LoadManager(int creditRate, int creditLimit, int debitWarn, int debitLimit) :
|
||||||
mCreditRate(creditRate), mCreditLimit(creditLimit), mDebitWarn(debitWarn), mDebitLimit(debitLimit),
|
mCreditRate(creditRate), mCreditLimit(creditLimit), mDebitWarn(debitWarn), mDebitLimit(debitLimit),
|
||||||
mShutdown(false), mUptime(0), mCosts(LT_MAX)
|
mShutdown(false), mUptime(0), mDeadLock(0), mCosts(LT_MAX)
|
||||||
{
|
{
|
||||||
addLoadCost(LoadCost(LT_InvalidRequest, -10, LC_CPU | LC_Network));
|
addLoadCost(LoadCost(LT_InvalidRequest, -10, LC_CPU | LC_Network));
|
||||||
addLoadCost(LoadCost(LT_RequestNoReply, -1, LC_CPU | LC_Disk));
|
addLoadCost(LoadCost(LT_RequestNoReply, -1, LC_CPU | LC_Disk));
|
||||||
@@ -67,6 +67,11 @@ LoadManager::~LoadManager()
|
|||||||
while (1);
|
while (1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LoadManager::noDeadLock()
|
||||||
|
{
|
||||||
|
boost::mutex::scoped_lock sl(mLock);
|
||||||
|
mDeadLock = mUptime;
|
||||||
|
}
|
||||||
|
|
||||||
int LoadManager::getCreditRate() const
|
int LoadManager::getCreditRate() const
|
||||||
{
|
{
|
||||||
@@ -321,6 +326,11 @@ int LoadManager::getUptime()
|
|||||||
return mUptime;
|
return mUptime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void LogDeadLock(int dlTime)
|
||||||
|
{
|
||||||
|
cLog(lsWARNING) << "Server stalled for " << dlTime << " seconds.";
|
||||||
|
}
|
||||||
|
|
||||||
void LoadManager::threadEntry()
|
void LoadManager::threadEntry()
|
||||||
{
|
{
|
||||||
NameThread("loadmgr");
|
NameThread("loadmgr");
|
||||||
@@ -335,6 +345,18 @@ void LoadManager::threadEntry()
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
++mUptime;
|
++mUptime;
|
||||||
|
|
||||||
|
int dlTime = mUptime - mDeadLock;
|
||||||
|
if (dlTime >= 10)
|
||||||
|
{
|
||||||
|
if ((dlTime % 10) == 0)
|
||||||
|
{
|
||||||
|
boost::thread(BIND_TYPE(&LogDeadLock, dlTime)).detach();
|
||||||
|
}
|
||||||
|
|
||||||
|
assert (dlTime < 180);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool change;
|
bool change;
|
||||||
|
|||||||
@@ -106,6 +106,8 @@ protected:
|
|||||||
int mUptime;
|
int mUptime;
|
||||||
int mSpace2[4];
|
int mSpace2[4];
|
||||||
|
|
||||||
|
int mDeadLock; // Detect server deadlocks
|
||||||
|
|
||||||
mutable boost::mutex mLock;
|
mutable boost::mutex mLock;
|
||||||
|
|
||||||
void canonicalize(LoadSource&, int upTime) const;
|
void canonicalize(LoadSource&, int upTime) const;
|
||||||
@@ -141,6 +143,7 @@ public:
|
|||||||
|
|
||||||
int getCost(LoadType t) { return mCosts[static_cast<int>(t)].mCost; }
|
int getCost(LoadType t) { return mCosts[static_cast<int>(t)].mCost; }
|
||||||
int getUptime();
|
int getUptime();
|
||||||
|
void noDeadLock();
|
||||||
};
|
};
|
||||||
|
|
||||||
class LoadFeeTrack
|
class LoadFeeTrack
|
||||||
|
|||||||
@@ -593,6 +593,8 @@ void NetworkOPs::checkState(const boost::system::error_code& result)
|
|||||||
{
|
{
|
||||||
ScopedLock sl(theApp->getMasterLock());
|
ScopedLock sl(theApp->getMasterLock());
|
||||||
|
|
||||||
|
theApp->getLoadManager().noDeadLock();
|
||||||
|
|
||||||
std::vector<Peer::pointer> peerList = theApp->getConnectionPool().getPeerVector();
|
std::vector<Peer::pointer> peerList = theApp->getConnectionPool().getPeerVector();
|
||||||
|
|
||||||
// do we have sufficient peers? If not, we are disconnected.
|
// do we have sufficient peers? If not, we are disconnected.
|
||||||
|
|||||||
Reference in New Issue
Block a user