State accounting:

Record total duration spent in and every transition to each
possible server state. Display as counters through server_info
RPC call.
This commit is contained in:
Mark Travis
2015-10-09 12:31:54 -07:00
committed by Nik Bougalis
parent b71ff4cb88
commit fdd012c420
2 changed files with 135 additions and 10 deletions

View File

@@ -84,6 +84,7 @@ class NetworkOPsImp final
/**
* Transaction with input flags and results to be applied in batches.
*/
class TransactionStatus
{
public:
@@ -116,6 +117,61 @@ class NetworkOPsImp final
running,
};
static std::array<char const*, 5> const states_;
/**
* State accounting records two attributes for each possible server state:
* 1) Amount of time spent in each state (in microseconds). This value is
* updated upon each state transition.
* 2) Number of transitions to each state.
*
* This data can be polled through server_info and represented by
* monitoring systems similarly to how bandwidth, CPU, and other
* counter-based metrics are managed.
*
* State accounting is more accurate than periodic sampling of server
* state. With periodic sampling, it is very likely that state transitions
* are missed, and accuracy of time spent in each state is very rough.
*/
class StateAccounting
{
struct Counters
{
std::uint32_t transitions = 0;
std::chrono::microseconds dur = std::chrono::microseconds (0);
};
OperatingMode mode_ = omDISCONNECTED;
std::array<Counters, 5> counters_;
mutable std::mutex mutex_;
std::chrono::system_clock::time_point start_ =
std::chrono::system_clock::now();
static std::array<Json::StaticString const, 5> const states_;
static Json::StaticString const transitions_;
static Json::StaticString const dur_;
public:
explicit StateAccounting ()
{
counters_[omDISCONNECTED].transitions = 1;
}
/**
* Record state transition. Update duration spent in previous
* state.
*
* @param om New state.
*/
void mode (OperatingMode om);
/**
* Output state counters in JSON format.
*
* @return JSON object.
*/
Json::Value json() const;
};
public:
// VFALCO TODO Make LedgerMaster a SharedPtr or a reference.
//
@@ -141,6 +197,7 @@ public:
, m_job_queue (job_queue)
, m_standalone (standalone)
, m_network_quorum (start_valid ? 0 : network_quorum)
, accounting_ ()
{
}
@@ -486,8 +543,35 @@ private:
std::mutex mMutex;
DispatchState mDispatchState = DispatchState::none;
std::vector <TransactionStatus> mTransactions;
StateAccounting accounting_;
};
//------------------------------------------------------------------------------
static std::array<char const*, 5> const stateNames {{
"disconnected",
"connected",
"syncing",
"tracking",
"full"}};
static_assert (NetworkOPs::omDISCONNECTED == 0, "");
static_assert (NetworkOPs::omCONNECTED == 1, "");
static_assert (NetworkOPs::omSYNCING == 2, "");
static_assert (NetworkOPs::omTRACKING == 3, "");
static_assert (NetworkOPs::omFULL == 4, "");
std::array<char const*, 5> const NetworkOPsImp::states_ = stateNames;
std::array<Json::StaticString const, 5> const
NetworkOPsImp::StateAccounting::states_ = {{
Json::StaticString(stateNames[0]),
Json::StaticString(stateNames[1]),
Json::StaticString(stateNames[2]),
Json::StaticString(stateNames[3]),
Json::StaticString(stateNames[4])}};
//------------------------------------------------------------------------------
std::string
NetworkOPsImp::getHostId (bool forAdmin)
@@ -626,15 +710,6 @@ void NetworkOPsImp::processClusterTimer ()
std::string NetworkOPsImp::strOperatingMode () const
{
static char const* paStatusToken [] =
{
"disconnected",
"connected",
"syncing",
"tracking",
"full"
};
if (mMode == omFULL)
{
if (mConsensus->isProposing ())
@@ -644,7 +719,7 @@ std::string NetworkOPsImp::strOperatingMode () const
return "validating";
}
return paStatusToken[mMode];
return states_[mMode];
}
void NetworkOPsImp::submitTransaction (std::shared_ptr<STTx const> const& iTrans)
@@ -1594,6 +1669,8 @@ void NetworkOPsImp::setMode (OperatingMode om)
mMode = om;
accounting_.mode (om);
m_journal.info << "STATE->" << strOperatingMode ();
pubServer ();
}
@@ -2044,6 +2121,8 @@ Json::Value NetworkOPsImp::getServerInfo (bool human, bool admin)
info[jss::published_ledger] = lpPublished->info().seq;
}
info[jss::state_accounting] = accounting_.json();
return info;
}
@@ -2994,6 +3073,49 @@ NetworkOPs::~NetworkOPs ()
//------------------------------------------------------------------------------
void NetworkOPsImp::StateAccounting::mode (OperatingMode om)
{
auto now = std::chrono::system_clock::now();
std::lock_guard<std::mutex> lock (mutex_);
++counters_[om].transitions;
counters_[mode_].dur += std::chrono::duration_cast<
std::chrono::microseconds>(now - start_);
mode_ = om;
start_ = now;
}
Json::Value NetworkOPsImp::StateAccounting::json() const
{
std::unique_lock<std::mutex> lock (mutex_);
auto counters = counters_;
auto const start = start_;
auto const mode = mode_;
lock.unlock();
counters[mode].dur += std::chrono::duration_cast<
std::chrono::microseconds>(std::chrono::system_clock::now() - start);
Json::Value ret = Json::objectValue;
for (std::underlying_type_t<OperatingMode> i = omDISCONNECTED;
i <= omFULL; ++i)
{
ret[states_[i]] = Json::objectValue;
auto& state = ret[states_[i]];
state[jss::transitions] = counters[i].transitions;
state[jss::duration_us] = std::to_string (counters[i].dur.count());
}
return ret;
}
//------------------------------------------------------------------------------
std::unique_ptr<NetworkOPs>
make_NetworkOPs (Application& app, NetworkOPs::clock_type& clock, bool standalone,
std::size_t network_quorum, bool startvalid,

View File

@@ -139,6 +139,7 @@ JSS ( dir_index ); // out: DirectoryEntryIterator
JSS ( dir_root ); // out: DirectoryEntryIterator
JSS ( directory ); // in: LedgerEntry
JSS ( drops ); // out: TxQ
JSS ( duration_us ); // out: NetworkOPs
JSS ( enabled ); // out: AmendmentTable
JSS ( engine_result ); // out: NetworkOPs, TransactionSign, Submit
JSS ( engine_result_code ); // out: NetworkOPs, TransactionSign, Submit
@@ -339,6 +340,7 @@ JSS ( source_currencies ); // in: PathRequest, RipplePathFind
JSS ( stand_alone ); // out: NetworkOPs
JSS ( start ); // in: TxHistory
JSS ( state ); // out: Logic.h, ServerState, LedgerData
JSS ( state_accounting ); // out: NetworkOPs
JSS ( state_now ); // in: Subscribe
JSS ( status ); // error
JSS ( stop ); // in: LedgerCleaner
@@ -365,6 +367,7 @@ JSS ( transaction ); // in: Tx
JSS ( transaction_hash ); // out: LedgerProposal, LedgerToJson
JSS ( transactions ); // out: LedgerToJson,
// in: AccountTx*, Unsubscribe
JSS ( transitions ); // out: NetworkOPs
JSS ( treenode_cache_size ); // out: GetCounts
JSS ( treenode_track_size ); // out: GetCounts
JSS ( tx ); // out: STTx, AccountTx*