mirror of
https://github.com/XRPLF/rippled.git
synced 2025-12-06 17:27:55 +00:00
Performance logging and counters:
* Tally and duration counters for Job Queue tasks and RPC calls
optionally rendered by server_info and server_state, and
optionally printed to a distinct log file.
- Tally each Job Queue task as it is queued, starts, and
finishes running. Track total duration queued and running.
- Tally each RPC call as it starts and either finishes
successfully or throws an exception. Track total running
duration for each.
* Track currently executing Job Queue tasks and RPC methods
along with durations.
* Json-formatted performance log file written by a dedicated
thread, for above-described data.
* New optional parameter, "counters", for server_info and
server_state. If set, render Job Queue and RPC call counters
as well as currently executing tasks.
* New configuration section, "[perf]", to optionally control
performance logging to a file.
* Support optional sub-second periods when rendering human-readable
time points.
This commit is contained in:
committed by
Nikolaos D. Bougalis
parent
ef3bc92b82
commit
8eb8c77886
@@ -89,7 +89,9 @@ public:
|
||||
|
||||
private:
|
||||
boost::filesystem::path CONFIG_FILE;
|
||||
public:
|
||||
boost::filesystem::path CONFIG_DIR;
|
||||
private:
|
||||
boost::filesystem::path DEBUG_LOGFILE;
|
||||
|
||||
void load ();
|
||||
|
||||
@@ -31,6 +31,11 @@
|
||||
|
||||
namespace ripple {
|
||||
|
||||
namespace perf
|
||||
{
|
||||
class PerfLog;
|
||||
}
|
||||
|
||||
class Logs;
|
||||
struct Coro_create_t
|
||||
{
|
||||
@@ -132,7 +137,8 @@ public:
|
||||
using JobFunction = std::function <void(Job&)>;
|
||||
|
||||
JobQueue (beast::insight::Collector::ptr const& collector,
|
||||
Stoppable& parent, beast::Journal journal, Logs& logs);
|
||||
Stoppable& parent, beast::Journal journal, Logs& logs,
|
||||
perf::PerfLog& perfLog);
|
||||
~JobQueue ();
|
||||
|
||||
/** Adds a job to the JobQueue.
|
||||
@@ -226,18 +232,13 @@ private:
|
||||
Job::CancelCallback m_cancelCallback;
|
||||
|
||||
// Statistics tracking
|
||||
perf::PerfLog& perfLog_;
|
||||
beast::insight::Collector::ptr m_collector;
|
||||
beast::insight::Gauge job_count;
|
||||
beast::insight::Hook hook;
|
||||
|
||||
std::condition_variable cv_;
|
||||
|
||||
static JobTypes const& getJobTypes()
|
||||
{
|
||||
static JobTypes types;
|
||||
return types;
|
||||
}
|
||||
|
||||
void collect();
|
||||
JobTypeData& getJobTypeData (JobType type);
|
||||
|
||||
@@ -304,14 +305,6 @@ private:
|
||||
// <none>
|
||||
void finishJob (JobType type);
|
||||
|
||||
template <class Rep, class Period>
|
||||
void on_dequeue (JobType type,
|
||||
std::chrono::duration <Rep, Period> const& value);
|
||||
|
||||
template <class Rep, class Period>
|
||||
void on_execute (JobType type,
|
||||
std::chrono::duration <Rep, Period> const& value);
|
||||
|
||||
// Runs the next appropriate waiting Job.
|
||||
//
|
||||
// Pre-conditions:
|
||||
@@ -322,7 +315,7 @@ private:
|
||||
//
|
||||
// Invariants:
|
||||
// <none>
|
||||
void processTask () override;
|
||||
void processTask (int instance) override;
|
||||
|
||||
// Returns the limit of running jobs for the given job type.
|
||||
// For jobs with no limit, we return the largest int. Hopefully that
|
||||
|
||||
@@ -48,7 +48,7 @@ public:
|
||||
bool special, std::chrono::milliseconds avgLatency,
|
||||
std::chrono::milliseconds peakLatency)
|
||||
: m_type (type)
|
||||
, m_name (name)
|
||||
, m_name (std::move(name))
|
||||
, m_limit (limit)
|
||||
, m_special (special)
|
||||
, m_avgLatency (avgLatency)
|
||||
@@ -62,7 +62,7 @@ public:
|
||||
return m_type;
|
||||
}
|
||||
|
||||
std::string name () const
|
||||
std::string const& name () const
|
||||
{
|
||||
return m_name;
|
||||
}
|
||||
|
||||
@@ -23,6 +23,9 @@
|
||||
#include <ripple/core/Job.h>
|
||||
#include <ripple/core/JobTypeInfo.h>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace ripple
|
||||
{
|
||||
@@ -33,6 +36,7 @@ public:
|
||||
using Map = std::map <JobType, JobTypeInfo>;
|
||||
using const_iterator = Map::const_iterator;
|
||||
|
||||
private:
|
||||
JobTypes ()
|
||||
: m_unknown (jtINVALID, "invalid", 0, true, std::chrono::milliseconds{0},
|
||||
std::chrono::milliseconds{0})
|
||||
@@ -56,7 +60,7 @@ add( jtADVANCE, "advanceLedger", maxLimit, false, 0ms, 0m
|
||||
add( jtPUBLEDGER, "publishNewLedger", maxLimit, false, 3000ms, 4500ms);
|
||||
add( jtTXN_DATA, "fetchTxnData", 1, false, 0ms, 0ms);
|
||||
add( jtWAL, "writeAhead", maxLimit, false, 1000ms, 2500ms);
|
||||
add( jtVALIDATION_t, "trustedValidation", maxLimit, false, 500ms, 1500ms);
|
||||
add( jtVALIDATION_t, "trustedValidation", maxLimit, false, 500ms, 1500ms);
|
||||
add( jtWRITE, "writeObjects", maxLimit, false, 1750ms, 2500ms);
|
||||
add( jtACCEPT, "acceptLedger", maxLimit, false, 0ms, 0ms);
|
||||
add( jtPROPOSAL_t, "trustedProposal", maxLimit, false, 100ms, 500ms);
|
||||
@@ -79,6 +83,13 @@ add( jtNS_WRITE, "WriteNode", 0, true, 0ms, 0m
|
||||
|
||||
}
|
||||
|
||||
public:
|
||||
static JobTypes const& instance()
|
||||
{
|
||||
static JobTypes const types;
|
||||
return types;
|
||||
}
|
||||
|
||||
JobTypeInfo const& get (JobType jt) const
|
||||
{
|
||||
Map::const_iterator const iter (m_map.find (jt));
|
||||
@@ -95,6 +106,11 @@ add( jtNS_WRITE, "WriteNode", 0, true, 0ms, 0m
|
||||
return m_unknown;
|
||||
}
|
||||
|
||||
Map::size_type size () const
|
||||
{
|
||||
return m_map.size();
|
||||
}
|
||||
|
||||
const_iterator begin () const
|
||||
{
|
||||
return m_map.cbegin ();
|
||||
|
||||
@@ -130,7 +130,7 @@ class RootStoppable;
|
||||
For stoppables which are only considered stopped when all of their
|
||||
children have stopped, and their own internal logic indicates a stop, it
|
||||
will be necessary to perform special actions in onChildrenStopped(). The
|
||||
funtion areChildrenStopped() can be used after children have stopped,
|
||||
function areChildrenStopped() can be used after children have stopped,
|
||||
but before the Stoppable logic itself has stopped, to determine if the
|
||||
stoppable's logic is a true stop.
|
||||
|
||||
|
||||
@@ -19,18 +19,21 @@
|
||||
|
||||
#include <ripple/core/JobQueue.h>
|
||||
#include <ripple/basics/contract.h>
|
||||
#include <ripple/basics/PerfLog.h>
|
||||
|
||||
namespace ripple {
|
||||
|
||||
JobQueue::JobQueue (beast::insight::Collector::ptr const& collector,
|
||||
Stoppable& parent, beast::Journal journal, Logs& logs)
|
||||
Stoppable& parent, beast::Journal journal, Logs& logs,
|
||||
perf::PerfLog& perfLog)
|
||||
: Stoppable ("JobQueue", parent)
|
||||
, m_journal (journal)
|
||||
, m_lastJob (0)
|
||||
, m_invalidJobData (getJobTypes ().getInvalid (), collector, logs)
|
||||
, m_invalidJobData (JobTypes::instance().getInvalid (), collector, logs)
|
||||
, m_processCount (0)
|
||||
, m_workers (*this, "JobQueue", 0)
|
||||
, m_workers (*this, perfLog, "JobQueue", 0)
|
||||
, m_cancelCallback (std::bind (&Stoppable::isStopping, this))
|
||||
, perfLog_ (perfLog)
|
||||
, m_collector (collector)
|
||||
{
|
||||
hook = m_collector->make_hook (std::bind (&JobQueue::collect, this));
|
||||
@@ -39,7 +42,7 @@ JobQueue::JobQueue (beast::insight::Collector::ptr const& collector,
|
||||
{
|
||||
std::lock_guard <std::mutex> lock (m_mutex);
|
||||
|
||||
for (auto const& x : getJobTypes ())
|
||||
for (auto const& x : JobTypes::instance())
|
||||
{
|
||||
JobTypeInfo const& jt = x.second;
|
||||
|
||||
@@ -329,6 +332,7 @@ JobQueue::queueJob (Job const& job, std::lock_guard <std::mutex> const& lock)
|
||||
JobType const type (job.getType ());
|
||||
assert (type != jtINVALID);
|
||||
assert (m_jobSet.find (job) != m_jobSet.end ());
|
||||
perfLog_.jobQueue(type);
|
||||
|
||||
JobTypeData& data (getJobTypeData (type));
|
||||
|
||||
@@ -398,34 +402,13 @@ JobQueue::finishJob (JobType type)
|
||||
--data.running;
|
||||
}
|
||||
|
||||
template <class Rep, class Period>
|
||||
void JobQueue::on_dequeue (JobType type,
|
||||
std::chrono::duration <Rep, Period> const& value)
|
||||
{
|
||||
using namespace std::chrono;
|
||||
auto const ms = ceil<milliseconds>(value);
|
||||
|
||||
if (ms >= 10ms)
|
||||
getJobTypeData (type).dequeue.notify (ms);
|
||||
}
|
||||
|
||||
template <class Rep, class Period>
|
||||
void JobQueue::on_execute (JobType type,
|
||||
std::chrono::duration <Rep, Period> const& value)
|
||||
{
|
||||
using namespace std::chrono;
|
||||
auto const ms (ceil <milliseconds> (value));
|
||||
|
||||
if (ms >= 10ms)
|
||||
getJobTypeData (type).execute.notify (ms);
|
||||
}
|
||||
|
||||
void
|
||||
JobQueue::processTask ()
|
||||
JobQueue::processTask (int instance)
|
||||
{
|
||||
JobType type;
|
||||
|
||||
{
|
||||
using namespace std::chrono;
|
||||
Job::clock_type::time_point const start_time (
|
||||
Job::clock_type::now());
|
||||
{
|
||||
@@ -438,10 +421,18 @@ JobQueue::processTask ()
|
||||
type = job.getType();
|
||||
JobTypeData& data(getJobTypeData(type));
|
||||
JLOG(m_journal.trace()) << "Doing " << data.name () << " job";
|
||||
on_dequeue (job.getType (), start_time - job.queue_time ());
|
||||
auto const us = date::ceil<microseconds>(
|
||||
start_time - job.queue_time());
|
||||
perfLog_.jobStart(type, us, start_time, instance);
|
||||
if (us >= 10ms)
|
||||
getJobTypeData(type).dequeue.notify(us);
|
||||
job.doJob ();
|
||||
}
|
||||
on_execute(type, Job::clock_type::now() - start_time);
|
||||
auto const us (
|
||||
date::ceil<microseconds>(Job::clock_type::now() - start_time));
|
||||
perfLog_.jobFinish(type, us, instance);
|
||||
if (us >= 10ms)
|
||||
getJobTypeData(type).execute.notify(us);
|
||||
}
|
||||
|
||||
{
|
||||
@@ -462,7 +453,7 @@ JobQueue::processTask ()
|
||||
int
|
||||
JobQueue::getJobLimit (JobType type)
|
||||
{
|
||||
JobTypeInfo const& j (getJobTypes ().get (type));
|
||||
JobTypeInfo const& j (JobTypes::instance().get (type));
|
||||
assert (j.type () != jtINVALID);
|
||||
|
||||
return j.limit ();
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
|
||||
#include <ripple/basics/Log.h>
|
||||
#include <ripple/basics/UptimeTimer.h>
|
||||
#include <ripple/beast/clock/chrono_util.h>
|
||||
#include <ripple/basics/date.h>
|
||||
#include <ripple/core/LoadMonitor.h>
|
||||
|
||||
namespace ripple {
|
||||
@@ -108,14 +108,15 @@ void LoadMonitor::addLoadSample (LoadEvent const& s)
|
||||
|
||||
auto const total = s.runTime() + s.waitTime();
|
||||
// Don't include "jitter" as part of the latency
|
||||
auto const latency = total < 2ms ? 0ms : round<milliseconds>(total);
|
||||
auto const latency = total < 2ms ? 0ms : date::round<milliseconds>(total);
|
||||
|
||||
if (latency > 500ms)
|
||||
{
|
||||
auto mj = (latency > 1s) ? j_.warn() : j_.info();
|
||||
JLOG (mj) << "Job: " << s.name() <<
|
||||
" run: " << round<milliseconds>(s.runTime()).count() << "ms" <<
|
||||
" wait: " << round<milliseconds>(s.waitTime()).count() << "ms";
|
||||
" run: " << date::round<milliseconds>(s.runTime()).count() <<
|
||||
"ms" << " wait: " <<
|
||||
date::round<milliseconds>(s.waitTime()).count() << "ms";
|
||||
}
|
||||
|
||||
addSamples (1, latency);
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
//==============================================================================
|
||||
|
||||
#include <ripple/core/impl/Workers.h>
|
||||
#include <ripple/basics/PerfLog.h>
|
||||
#include <ripple/beast/core/CurrentThreadName.h>
|
||||
#include <cassert>
|
||||
|
||||
@@ -25,9 +26,11 @@ namespace ripple {
|
||||
|
||||
Workers::Workers (
|
||||
Callback& callback,
|
||||
perf::PerfLog& perfLog,
|
||||
std::string const& threadNames,
|
||||
int numberOfThreads)
|
||||
: m_callback (callback)
|
||||
, perfLog_ (perfLog)
|
||||
, m_threadNames (threadNames)
|
||||
, m_allPaused (true, true)
|
||||
, m_semaphore (0)
|
||||
@@ -57,12 +60,14 @@ int Workers::getNumberOfThreads () const noexcept
|
||||
//
|
||||
void Workers::setNumberOfThreads (int numberOfThreads)
|
||||
{
|
||||
static int instance {0};
|
||||
if (m_numberOfThreads != numberOfThreads)
|
||||
{
|
||||
perfLog_.resizeJobs(numberOfThreads);
|
||||
|
||||
if (numberOfThreads > m_numberOfThreads)
|
||||
{
|
||||
// Increasing the number of working threads
|
||||
|
||||
int const amount = numberOfThreads - m_numberOfThreads;
|
||||
|
||||
for (int i = 0; i < amount; ++i)
|
||||
@@ -79,15 +84,14 @@ void Workers::setNumberOfThreads (int numberOfThreads)
|
||||
}
|
||||
else
|
||||
{
|
||||
worker = new Worker (*this, m_threadNames);
|
||||
worker = new Worker (*this, m_threadNames, instance++);
|
||||
m_everyone.push_front (worker);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (numberOfThreads < m_numberOfThreads)
|
||||
else
|
||||
{
|
||||
// Decreasing the number of working threads
|
||||
|
||||
int const amount = m_numberOfThreads - numberOfThreads;
|
||||
|
||||
for (int i = 0; i < amount; ++i)
|
||||
@@ -142,9 +146,11 @@ void Workers::deleteWorkers (beast::LockFreeStack <Worker>& stack)
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
Workers::Worker::Worker (Workers& workers, std::string const& threadName)
|
||||
Workers::Worker::Worker (Workers& workers, std::string const& threadName,
|
||||
int const instance)
|
||||
: m_workers {workers}
|
||||
, threadName_ {threadName}
|
||||
, instance_ {instance}
|
||||
, wakeCount_ {0}
|
||||
, shouldExit_ {false}
|
||||
{
|
||||
@@ -216,7 +222,7 @@ void Workers::Worker::run ()
|
||||
// unblocked in order to process a task.
|
||||
//
|
||||
++m_workers.m_runningTaskCount;
|
||||
m_workers.m_callback.processTask ();
|
||||
m_workers.m_callback.processTask (instance_);
|
||||
--m_workers.m_runningTaskCount;
|
||||
}
|
||||
|
||||
|
||||
@@ -31,6 +31,11 @@
|
||||
|
||||
namespace ripple {
|
||||
|
||||
namespace perf
|
||||
{
|
||||
class PerfLog;
|
||||
}
|
||||
|
||||
/** A group of threads that process tasks.
|
||||
*/
|
||||
class Workers
|
||||
@@ -45,9 +50,11 @@ public:
|
||||
that you only process one task from inside your callback. Each
|
||||
call to addTask will result in exactly one call to processTask.
|
||||
|
||||
@param instance The worker thread instance.
|
||||
|
||||
@see Workers::addTask
|
||||
*/
|
||||
virtual void processTask () = 0;
|
||||
virtual void processTask (int instance) = 0;
|
||||
};
|
||||
|
||||
/** Create the object.
|
||||
@@ -58,6 +65,7 @@ public:
|
||||
@param threadNames The name given to each created worker thread.
|
||||
*/
|
||||
explicit Workers (Callback& callback,
|
||||
perf::PerfLog& perfLog,
|
||||
std::string const& threadNames = "Worker",
|
||||
int numberOfThreads =
|
||||
static_cast<int>(std::thread::hardware_concurrency()));
|
||||
@@ -126,7 +134,9 @@ private:
|
||||
, public beast::LockFreeStack <Worker, PausedTag>::Node
|
||||
{
|
||||
public:
|
||||
Worker (Workers& workers, std::string const& threadName);
|
||||
Worker (Workers& workers,
|
||||
std::string const& threadName,
|
||||
int const instance);
|
||||
|
||||
~Worker ();
|
||||
|
||||
@@ -138,6 +148,7 @@ private:
|
||||
private:
|
||||
Workers& m_workers;
|
||||
std::string const threadName_;
|
||||
int const instance_;
|
||||
|
||||
std::thread thread_;
|
||||
std::mutex mutex_;
|
||||
@@ -151,6 +162,7 @@ private:
|
||||
|
||||
private:
|
||||
Callback& m_callback;
|
||||
perf::PerfLog& perfLog_;
|
||||
std::string m_threadNames; // The name to give each thread
|
||||
beast::WaitableEvent m_allPaused; // signaled when all threads paused
|
||||
semaphore m_semaphore; // each pending task is 1 resource
|
||||
|
||||
Reference in New Issue
Block a user