From 04b0f968fd9194a04a5ee264b737056a273c3311 Mon Sep 17 00:00:00 2001 From: JoelKatz Date: Wed, 8 May 2013 15:21:22 -0700 Subject: [PATCH] Detect server deadlocks and trigger the auto-restart mechanism. --- src/cpp/ripple/LoadManager.cpp | 24 +++++++++++++++++++++++- src/cpp/ripple/LoadManager.h | 3 +++ src/cpp/ripple/NetworkOPs.cpp | 2 ++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/cpp/ripple/LoadManager.cpp b/src/cpp/ripple/LoadManager.cpp index d83d87822..46965ff2b 100644 --- a/src/cpp/ripple/LoadManager.cpp +++ b/src/cpp/ripple/LoadManager.cpp @@ -22,7 +22,7 @@ int upTime() LoadManager::LoadManager(int creditRate, int creditLimit, int debitWarn, int debitLimit) : mCreditRate(creditRate), mCreditLimit(creditLimit), mDebitWarn(debitWarn), mDebitLimit(debitLimit), - mShutdown(false), mUptime(0), mCosts(LT_MAX) + mShutdown(false), mUptime(0), mDeadLock(0), mCosts(LT_MAX) { addLoadCost(LoadCost(LT_InvalidRequest, -10, LC_CPU | LC_Network)); addLoadCost(LoadCost(LT_RequestNoReply, -1, LC_CPU | LC_Disk)); @@ -67,6 +67,11 @@ LoadManager::~LoadManager() while (1); } +void LoadManager::noDeadLock() +{ + boost::mutex::scoped_lock sl(mLock); + mDeadLock = mUptime; +} int LoadManager::getCreditRate() const { @@ -321,6 +326,11 @@ int LoadManager::getUptime() return mUptime; } +static void LogDeadLock(int dlTime) +{ + cLog(lsWARNING) << "Server stalled for " << dlTime << " seconds."; +} + void LoadManager::threadEntry() { NameThread("loadmgr"); @@ -335,6 +345,18 @@ void LoadManager::threadEntry() return; } ++mUptime; + + int dlTime = mUptime - mDeadLock; + if (dlTime >= 10) + { + if ((dlTime % 10) == 0) + { + boost::thread(BIND_TYPE(&LogDeadLock, dlTime)).detach(); + } + + assert (dlTime < 180); + } + } bool change; diff --git a/src/cpp/ripple/LoadManager.h b/src/cpp/ripple/LoadManager.h index 392620241..07722b9e6 100644 --- a/src/cpp/ripple/LoadManager.h +++ b/src/cpp/ripple/LoadManager.h @@ -106,6 +106,8 @@ protected: int mUptime; int mSpace2[4]; + int mDeadLock; // Detect server deadlocks + mutable boost::mutex mLock; void canonicalize(LoadSource&, int upTime) const; @@ -141,6 +143,7 @@ public: int getCost(LoadType t) { return mCosts[static_cast(t)].mCost; } int getUptime(); + void noDeadLock(); }; class LoadFeeTrack diff --git a/src/cpp/ripple/NetworkOPs.cpp b/src/cpp/ripple/NetworkOPs.cpp index 795748382..291ed70dd 100644 --- a/src/cpp/ripple/NetworkOPs.cpp +++ b/src/cpp/ripple/NetworkOPs.cpp @@ -593,6 +593,8 @@ void NetworkOPs::checkState(const boost::system::error_code& result) { ScopedLock sl(theApp->getMasterLock()); + theApp->getLoadManager().noDeadLock(); + std::vector peerList = theApp->getConnectionPool().getPeerVector(); // do we have sufficient peers? If not, we are disconnected.