From fdd012c4206b4ad6242fc3dc6ede248e193f36b6 Mon Sep 17 00:00:00 2001 From: Mark Travis Date: Fri, 9 Oct 2015 12:31:54 -0700 Subject: [PATCH] State accounting: Record total duration spent in and every transition to each possible server state. Display as counters through server_info RPC call. --- src/ripple/app/misc/NetworkOPs.cpp | 142 +++++++++++++++++++++++++++-- src/ripple/protocol/JsonFields.h | 3 + 2 files changed, 135 insertions(+), 10 deletions(-) diff --git a/src/ripple/app/misc/NetworkOPs.cpp b/src/ripple/app/misc/NetworkOPs.cpp index 72e68a74b6..7c688e6af5 100644 --- a/src/ripple/app/misc/NetworkOPs.cpp +++ b/src/ripple/app/misc/NetworkOPs.cpp @@ -84,6 +84,7 @@ class NetworkOPsImp final /** * Transaction with input flags and results to be applied in batches. */ + class TransactionStatus { public: @@ -116,6 +117,61 @@ class NetworkOPsImp final running, }; + static std::array const states_; + + /** + * State accounting records two attributes for each possible server state: + * 1) Amount of time spent in each state (in microseconds). This value is + * updated upon each state transition. + * 2) Number of transitions to each state. + * + * This data can be polled through server_info and represented by + * monitoring systems similarly to how bandwidth, CPU, and other + * counter-based metrics are managed. + * + * State accounting is more accurate than periodic sampling of server + * state. With periodic sampling, it is very likely that state transitions + * are missed, and accuracy of time spent in each state is very rough. + */ + class StateAccounting + { + struct Counters + { + std::uint32_t transitions = 0; + std::chrono::microseconds dur = std::chrono::microseconds (0); + }; + + OperatingMode mode_ = omDISCONNECTED; + std::array counters_; + mutable std::mutex mutex_; + std::chrono::system_clock::time_point start_ = + std::chrono::system_clock::now(); + static std::array const states_; + static Json::StaticString const transitions_; + static Json::StaticString const dur_; + + public: + explicit StateAccounting () + { + counters_[omDISCONNECTED].transitions = 1; + } + + /** + * Record state transition. Update duration spent in previous + * state. + * + * @param om New state. + */ + void mode (OperatingMode om); + + /** + * Output state counters in JSON format. + * + * @return JSON object. + */ + Json::Value json() const; + }; + public: // VFALCO TODO Make LedgerMaster a SharedPtr or a reference. // @@ -141,6 +197,7 @@ public: , m_job_queue (job_queue) , m_standalone (standalone) , m_network_quorum (start_valid ? 0 : network_quorum) + , accounting_ () { } @@ -486,8 +543,35 @@ private: std::mutex mMutex; DispatchState mDispatchState = DispatchState::none; std::vector mTransactions; + + StateAccounting accounting_; }; +//------------------------------------------------------------------------------ + +static std::array const stateNames {{ + "disconnected", + "connected", + "syncing", + "tracking", + "full"}}; + +static_assert (NetworkOPs::omDISCONNECTED == 0, ""); +static_assert (NetworkOPs::omCONNECTED == 1, ""); +static_assert (NetworkOPs::omSYNCING == 2, ""); +static_assert (NetworkOPs::omTRACKING == 3, ""); +static_assert (NetworkOPs::omFULL == 4, ""); + +std::array const NetworkOPsImp::states_ = stateNames; + +std::array const +NetworkOPsImp::StateAccounting::states_ = {{ + Json::StaticString(stateNames[0]), + Json::StaticString(stateNames[1]), + Json::StaticString(stateNames[2]), + Json::StaticString(stateNames[3]), + Json::StaticString(stateNames[4])}}; + //------------------------------------------------------------------------------ std::string NetworkOPsImp::getHostId (bool forAdmin) @@ -626,15 +710,6 @@ void NetworkOPsImp::processClusterTimer () std::string NetworkOPsImp::strOperatingMode () const { - static char const* paStatusToken [] = - { - "disconnected", - "connected", - "syncing", - "tracking", - "full" - }; - if (mMode == omFULL) { if (mConsensus->isProposing ()) @@ -644,7 +719,7 @@ std::string NetworkOPsImp::strOperatingMode () const return "validating"; } - return paStatusToken[mMode]; + return states_[mMode]; } void NetworkOPsImp::submitTransaction (std::shared_ptr const& iTrans) @@ -1594,6 +1669,8 @@ void NetworkOPsImp::setMode (OperatingMode om) mMode = om; + accounting_.mode (om); + m_journal.info << "STATE->" << strOperatingMode (); pubServer (); } @@ -2044,6 +2121,8 @@ Json::Value NetworkOPsImp::getServerInfo (bool human, bool admin) info[jss::published_ledger] = lpPublished->info().seq; } + info[jss::state_accounting] = accounting_.json(); + return info; } @@ -2994,6 +3073,49 @@ NetworkOPs::~NetworkOPs () //------------------------------------------------------------------------------ + +void NetworkOPsImp::StateAccounting::mode (OperatingMode om) +{ + auto now = std::chrono::system_clock::now(); + + std::lock_guard lock (mutex_); + ++counters_[om].transitions; + counters_[mode_].dur += std::chrono::duration_cast< + std::chrono::microseconds>(now - start_); + + mode_ = om; + start_ = now; +} + +Json::Value NetworkOPsImp::StateAccounting::json() const +{ + std::unique_lock lock (mutex_); + + auto counters = counters_; + auto const start = start_; + auto const mode = mode_; + + lock.unlock(); + + counters[mode].dur += std::chrono::duration_cast< + std::chrono::microseconds>(std::chrono::system_clock::now() - start); + + Json::Value ret = Json::objectValue; + + for (std::underlying_type_t i = omDISCONNECTED; + i <= omFULL; ++i) + { + ret[states_[i]] = Json::objectValue; + auto& state = ret[states_[i]]; + state[jss::transitions] = counters[i].transitions; + state[jss::duration_us] = std::to_string (counters[i].dur.count()); + } + + return ret; +} + +//------------------------------------------------------------------------------ + std::unique_ptr make_NetworkOPs (Application& app, NetworkOPs::clock_type& clock, bool standalone, std::size_t network_quorum, bool startvalid, diff --git a/src/ripple/protocol/JsonFields.h b/src/ripple/protocol/JsonFields.h index 5a86de19e0..53c580e146 100644 --- a/src/ripple/protocol/JsonFields.h +++ b/src/ripple/protocol/JsonFields.h @@ -139,6 +139,7 @@ JSS ( dir_index ); // out: DirectoryEntryIterator JSS ( dir_root ); // out: DirectoryEntryIterator JSS ( directory ); // in: LedgerEntry JSS ( drops ); // out: TxQ +JSS ( duration_us ); // out: NetworkOPs JSS ( enabled ); // out: AmendmentTable JSS ( engine_result ); // out: NetworkOPs, TransactionSign, Submit JSS ( engine_result_code ); // out: NetworkOPs, TransactionSign, Submit @@ -339,6 +340,7 @@ JSS ( source_currencies ); // in: PathRequest, RipplePathFind JSS ( stand_alone ); // out: NetworkOPs JSS ( start ); // in: TxHistory JSS ( state ); // out: Logic.h, ServerState, LedgerData +JSS ( state_accounting ); // out: NetworkOPs JSS ( state_now ); // in: Subscribe JSS ( status ); // error JSS ( stop ); // in: LedgerCleaner @@ -365,6 +367,7 @@ JSS ( transaction ); // in: Tx JSS ( transaction_hash ); // out: LedgerProposal, LedgerToJson JSS ( transactions ); // out: LedgerToJson, // in: AccountTx*, Unsubscribe +JSS ( transitions ); // out: NetworkOPs JSS ( treenode_cache_size ); // out: GetCounts JSS ( treenode_track_size ); // out: GetCounts JSS ( tx ); // out: STTx, AccountTx*