Detect server deadlocks and trigger the auto-restart mechanism.

This commit is contained in:
JoelKatz
2013-05-08 15:21:22 -07:00
parent 4f6344bb13
commit d0d071c46b
3 changed files with 28 additions and 1 deletions

View File

@@ -22,7 +22,7 @@ int upTime()
LoadManager::LoadManager(int creditRate, int creditLimit, int debitWarn, int debitLimit) :
mCreditRate(creditRate), mCreditLimit(creditLimit), mDebitWarn(debitWarn), mDebitLimit(debitLimit),
mShutdown(false), mUptime(0), mCosts(LT_MAX)
mShutdown(false), mUptime(0), mDeadLock(0), mCosts(LT_MAX)
{
addLoadCost(LoadCost(LT_InvalidRequest, -10, LC_CPU | LC_Network));
addLoadCost(LoadCost(LT_RequestNoReply, -1, LC_CPU | LC_Disk));
@@ -67,6 +67,11 @@ LoadManager::~LoadManager()
while (1);
}
void LoadManager::noDeadLock()
{
boost::mutex::scoped_lock sl(mLock);
mDeadLock = mUptime;
}
int LoadManager::getCreditRate() const
{
@@ -321,6 +326,11 @@ int LoadManager::getUptime()
return mUptime;
}
static void LogDeadLock(int dlTime)
{
cLog(lsWARNING) << "Server stalled for " << dlTime << " seconds.";
}
void LoadManager::threadEntry()
{
NameThread("loadmgr");
@@ -335,6 +345,18 @@ void LoadManager::threadEntry()
return;
}
++mUptime;
int dlTime = mUptime - mDeadLock;
if (dlTime >= 10)
{
if ((dlTime % 10) == 0)
{
boost::thread(BIND_TYPE(&LogDeadLock, dlTime)).detach();
}
assert (dlTime < 180);
}
}
bool change;

View File

@@ -106,6 +106,8 @@ protected:
int mUptime;
int mSpace2[4];
int mDeadLock; // Detect server deadlocks
mutable boost::mutex mLock;
void canonicalize(LoadSource&, int upTime) const;
@@ -141,6 +143,7 @@ public:
int getCost(LoadType t) { return mCosts[static_cast<int>(t)].mCost; }
int getUptime();
void noDeadLock();
};
class LoadFeeTrack

View File

@@ -595,6 +595,8 @@ void NetworkOPs::checkState(const boost::system::error_code& result)
{
ScopedLock sl(theApp->getMasterLock());
theApp->getLoadManager().noDeadLock();
std::vector<Peer::pointer> peerList = theApp->getConnectionPool().getPeerVector();
// do we have sufficient peers? If not, we are disconnected.