Performance logging and counters:

* Tally and duration counters for Job Queue tasks and RPC calls optionally rendered by server_info and server_state, and optionally printed to a distinct log file. - Tally each Job Queue task as it is queued, starts, and finishes running. Track total duration queued and running. - Tally each RPC call as it starts and either finishes successfully or throws an exception. Track total running duration for each. * Track currently executing Job Queue tasks and RPC methods along with durations. * Json-formatted performance log file written by a dedicated thread, for above-described data. * New optional parameter, "counters", for server_info and server_state. If set, render Job Queue and RPC call counters as well as currently executing tasks. * New configuration section, "[perf]", to optionally control performance logging to a file. * Support optional sub-second periods when rendering human-readable time points.
2025-12-06 17:27:55 +00:00 · 2018-01-13 04:02:43 -08:00
parent ef3bc92b82
commit 8eb8c77886
45 changed files with 10379 additions and 577 deletions
--- a/src/ripple/core/Config.h
+++ b/src/ripple/core/Config.h
@@ -89,7 +89,9 @@ public:

 private:
    boost::filesystem::path CONFIG_FILE;
+public:
    boost::filesystem::path CONFIG_DIR;
+private:
    boost::filesystem::path DEBUG_LOGFILE;

    void load ();
--- a/src/ripple/core/JobQueue.h
+++ b/src/ripple/core/JobQueue.h
@@ -31,6 +31,11 @@

 namespace ripple {

+namespace perf
+{
+    class PerfLog;
+}
+
 class Logs;
 struct Coro_create_t
 {
@@ -132,7 +137,8 @@ public:
    using JobFunction = std::function <void(Job&)>;

    JobQueue (beast::insight::Collector::ptr const& collector,
-        Stoppable& parent, beast::Journal journal, Logs& logs);
+        Stoppable& parent, beast::Journal journal, Logs& logs,
+        perf::PerfLog& perfLog);
    ~JobQueue ();

    /** Adds a job to the JobQueue.
@@ -226,18 +232,13 @@ private:
    Job::CancelCallback m_cancelCallback;

    // Statistics tracking
+    perf::PerfLog& perfLog_;
    beast::insight::Collector::ptr m_collector;
    beast::insight::Gauge job_count;
    beast::insight::Hook hook;

    std::condition_variable cv_;

-    static JobTypes const& getJobTypes()
-    {
-        static JobTypes types;
-        return types;
-    }
-
    void collect();
    JobTypeData& getJobTypeData (JobType type);

@@ -304,14 +305,6 @@ private:
    //  <none>
    void finishJob (JobType type);

-    template <class Rep, class Period>
-    void on_dequeue (JobType type,
-        std::chrono::duration <Rep, Period> const& value);
-
-    template <class Rep, class Period>
-    void on_execute (JobType type,
-        std::chrono::duration <Rep, Period> const& value);
-
    // Runs the next appropriate waiting Job.
    //
    // Pre-conditions:
@@ -322,7 +315,7 @@ private:
    //
    // Invariants:
    //  <none>
-    void processTask () override;
+    void processTask (int instance) override;

    // Returns the limit of running jobs for the given job type.
    // For jobs with no limit, we return the largest int. Hopefully that
--- a/src/ripple/core/JobTypeInfo.h
+++ b/src/ripple/core/JobTypeInfo.h
@@ -48,7 +48,7 @@ public:
            bool special, std::chrono::milliseconds avgLatency,
            std::chrono::milliseconds peakLatency)
        : m_type (type)
-        , m_name (name)
+        , m_name (std::move(name))
        , m_limit (limit)
        , m_special (special)
        , m_avgLatency (avgLatency)
@@ -62,7 +62,7 @@ public:
        return m_type;
    }

-    std::string name () const
+    std::string const& name () const
    {
        return m_name;
    }
--- a/src/ripple/core/JobTypes.h
+++ b/src/ripple/core/JobTypes.h
@@ -23,6 +23,9 @@
 #include <ripple/core/Job.h>
 #include <ripple/core/JobTypeInfo.h>
 #include <map>
+#include <string>
+#include <type_traits>
+#include <unordered_map>

 namespace ripple
 {
@@ -33,6 +36,7 @@ public:
    using Map = std::map <JobType, JobTypeInfo>;
    using const_iterator = Map::const_iterator;

+private:
    JobTypes ()
        : m_unknown (jtINVALID, "invalid", 0, true, std::chrono::milliseconds{0},
                                                    std::chrono::milliseconds{0})
@@ -56,7 +60,7 @@ add(    jtADVANCE,       "advanceLedger",           maxLimit, false, 0ms,     0m
 add(    jtPUBLEDGER,     "publishNewLedger",        maxLimit, false, 3000ms,  4500ms);
 add(    jtTXN_DATA,      "fetchTxnData",            1,        false, 0ms,     0ms);
 add(    jtWAL,           "writeAhead",              maxLimit, false, 1000ms,  2500ms);
-add(    jtVALIDATION_t,  "trustedValidation",       maxLimit, false, 500ms,  1500ms);
+add(    jtVALIDATION_t,  "trustedValidation",       maxLimit, false, 500ms,   1500ms);
 add(    jtWRITE,         "writeObjects",            maxLimit, false, 1750ms,  2500ms);
 add(    jtACCEPT,        "acceptLedger",            maxLimit, false, 0ms,     0ms);
 add(    jtPROPOSAL_t,    "trustedProposal",         maxLimit, false, 100ms,   500ms);
@@ -79,6 +83,13 @@ add(    jtNS_WRITE,      "WriteNode",               0,        true,  0ms,     0m

    }

+public:
+    static JobTypes const& instance()
+    {
+        static JobTypes const types;
+        return types;
+    }
+
    JobTypeInfo const& get (JobType jt) const
    {
        Map::const_iterator const iter (m_map.find (jt));
@@ -95,6 +106,11 @@ add(    jtNS_WRITE,      "WriteNode",               0,        true,  0ms,     0m
        return m_unknown;
    }

+    Map::size_type size () const
+    {
+        return m_map.size();
+    }
+
    const_iterator begin () const
    {
        return m_map.cbegin ();
--- a/src/ripple/core/Stoppable.h
+++ b/src/ripple/core/Stoppable.h
@@ -130,7 +130,7 @@ class RootStoppable;
        For stoppables which are only considered stopped when all of their
        children have stopped, and their own internal logic indicates a stop, it
        will be necessary to perform special actions in onChildrenStopped(). The
-        funtion areChildrenStopped() can be used after children have stopped,
+        function areChildrenStopped() can be used after children have stopped,
        but before the Stoppable logic itself has stopped, to determine if the
        stoppable's logic is a true stop.

--- a/src/ripple/core/impl/JobQueue.cpp
+++ b/src/ripple/core/impl/JobQueue.cpp
@@ -19,18 +19,21 @@

 #include <ripple/core/JobQueue.h>
 #include <ripple/basics/contract.h>
+#include <ripple/basics/PerfLog.h>

 namespace ripple {

 JobQueue::JobQueue (beast::insight::Collector::ptr const& collector,
-    Stoppable& parent, beast::Journal journal, Logs& logs)
+    Stoppable& parent, beast::Journal journal, Logs& logs,
+    perf::PerfLog& perfLog)
    : Stoppable ("JobQueue", parent)
    , m_journal (journal)
    , m_lastJob (0)
-    , m_invalidJobData (getJobTypes ().getInvalid (), collector, logs)
+    , m_invalidJobData (JobTypes::instance().getInvalid (), collector, logs)
    , m_processCount (0)
-    , m_workers (*this, "JobQueue", 0)
+    , m_workers (*this, perfLog, "JobQueue", 0)
    , m_cancelCallback (std::bind (&Stoppable::isStopping, this))
+    , perfLog_ (perfLog)
    , m_collector (collector)
 {
    hook = m_collector->make_hook (std::bind (&JobQueue::collect, this));
@@ -39,7 +42,7 @@ JobQueue::JobQueue (beast::insight::Collector::ptr const& collector,
    {
        std::lock_guard <std::mutex> lock (m_mutex);

-        for (auto const& x : getJobTypes ())
+        for (auto const& x : JobTypes::instance())
        {
            JobTypeInfo const& jt = x.second;

@@ -329,6 +332,7 @@ JobQueue::queueJob (Job const& job, std::lock_guard <std::mutex> const& lock)
    JobType const type (job.getType ());
    assert (type != jtINVALID);
    assert (m_jobSet.find (job) != m_jobSet.end ());
+    perfLog_.jobQueue(type);

    JobTypeData& data (getJobTypeData (type));

@@ -398,34 +402,13 @@ JobQueue::finishJob (JobType type)
    --data.running;
 }

-template <class Rep, class Period>
-void JobQueue::on_dequeue (JobType type,
-    std::chrono::duration <Rep, Period> const& value)
-{
-    using namespace std::chrono;
-    auto const ms = ceil<milliseconds>(value);
-
-    if (ms >= 10ms)
-        getJobTypeData (type).dequeue.notify (ms);
-}
-
-template <class Rep, class Period>
-void JobQueue::on_execute (JobType type,
-    std::chrono::duration <Rep, Period> const& value)
-{
-    using namespace std::chrono;
-    auto const ms (ceil <milliseconds> (value));
-
-    if (ms >= 10ms)
-        getJobTypeData (type).execute.notify (ms);
-}
-
 void
-JobQueue::processTask ()
+JobQueue::processTask (int instance)
 {
    JobType type;

    {
+        using namespace std::chrono;
        Job::clock_type::time_point const start_time (
            Job::clock_type::now());
        {
@@ -438,10 +421,18 @@ JobQueue::processTask ()
            type = job.getType();
            JobTypeData& data(getJobTypeData(type));
            JLOG(m_journal.trace()) << "Doing " << data.name () << " job";
-            on_dequeue (job.getType (), start_time - job.queue_time ());
+            auto const us = date::ceil<microseconds>(
+                start_time - job.queue_time());
+            perfLog_.jobStart(type, us, start_time, instance);
+            if (us >= 10ms)
+                getJobTypeData(type).dequeue.notify(us);
            job.doJob ();
        }
-        on_execute(type, Job::clock_type::now() - start_time);
+        auto const us (
+            date::ceil<microseconds>(Job::clock_type::now() - start_time));
+        perfLog_.jobFinish(type, us, instance);
+        if (us >= 10ms)
+            getJobTypeData(type).execute.notify(us);
    }

    {
@@ -462,7 +453,7 @@ JobQueue::processTask ()
 int
 JobQueue::getJobLimit (JobType type)
 {
-    JobTypeInfo const& j (getJobTypes ().get (type));
+    JobTypeInfo const& j (JobTypes::instance().get (type));
    assert (j.type () != jtINVALID);

    return j.limit ();
--- a/src/ripple/core/impl/LoadMonitor.cpp
+++ b/src/ripple/core/impl/LoadMonitor.cpp
@@ -19,7 +19,7 @@

 #include <ripple/basics/Log.h>
 #include <ripple/basics/UptimeTimer.h>
-#include <ripple/beast/clock/chrono_util.h>
+#include <ripple/basics/date.h>
 #include <ripple/core/LoadMonitor.h>

 namespace ripple {
@@ -108,14 +108,15 @@ void LoadMonitor::addLoadSample (LoadEvent const& s)

    auto const total = s.runTime() + s.waitTime();
    // Don't include "jitter" as part of the latency
-    auto const latency = total < 2ms ? 0ms : round<milliseconds>(total);
+    auto const latency = total < 2ms ? 0ms : date::round<milliseconds>(total);

    if (latency > 500ms)
    {
        auto mj = (latency > 1s) ? j_.warn() : j_.info();
        JLOG (mj) << "Job: " << s.name() <<
-            " run: " << round<milliseconds>(s.runTime()).count() << "ms" <<
-            " wait: " << round<milliseconds>(s.waitTime()).count() << "ms";
+            " run: " << date::round<milliseconds>(s.runTime()).count() <<
+            "ms" << " wait: " <<
+            date::round<milliseconds>(s.waitTime()).count() << "ms";
    }

    addSamples (1, latency);
--- a/src/ripple/core/impl/Workers.cpp
+++ b/src/ripple/core/impl/Workers.cpp
@@ -18,6 +18,7 @@
 //==============================================================================

 #include <ripple/core/impl/Workers.h>
+#include <ripple/basics/PerfLog.h>
 #include <ripple/beast/core/CurrentThreadName.h>
 #include <cassert>

@@ -25,9 +26,11 @@ namespace ripple {

 Workers::Workers (
    Callback& callback,
+    perf::PerfLog& perfLog,
    std::string const& threadNames,
    int numberOfThreads)
        : m_callback (callback)
+        , perfLog_ (perfLog)
        , m_threadNames (threadNames)
        , m_allPaused (true, true)
        , m_semaphore (0)
@@ -57,12 +60,14 @@ int Workers::getNumberOfThreads () const noexcept
 //
 void Workers::setNumberOfThreads (int numberOfThreads)
 {
+    static int instance {0};
    if (m_numberOfThreads != numberOfThreads)
    {
+        perfLog_.resizeJobs(numberOfThreads);
+
        if (numberOfThreads > m_numberOfThreads)
        {
            // Increasing the number of working threads
-
            int const amount = numberOfThreads - m_numberOfThreads;

            for (int i = 0; i < amount; ++i)
@@ -79,15 +84,14 @@ void Workers::setNumberOfThreads (int numberOfThreads)
                }
                else
                {
-                    worker = new Worker (*this, m_threadNames);
+                    worker = new Worker (*this, m_threadNames, instance++);
                    m_everyone.push_front (worker);
                }
            }
        }
-        else if (numberOfThreads < m_numberOfThreads)
+        else
        {
            // Decreasing the number of working threads
-
            int const amount = m_numberOfThreads - numberOfThreads;

            for (int i = 0; i < amount; ++i)
@@ -142,9 +146,11 @@ void Workers::deleteWorkers (beast::LockFreeStack <Worker>& stack)

 //------------------------------------------------------------------------------

-Workers::Worker::Worker (Workers& workers, std::string const& threadName)
+Workers::Worker::Worker (Workers& workers, std::string const& threadName,
+    int const instance)
    : m_workers {workers}
    , threadName_ {threadName}
+    , instance_ {instance}
    , wakeCount_ {0}
    , shouldExit_ {false}
 {
@@ -216,7 +222,7 @@ void Workers::Worker::run ()
            // unblocked in order to process a task.
            //
            ++m_workers.m_runningTaskCount;
-            m_workers.m_callback.processTask ();
+            m_workers.m_callback.processTask (instance_);
            --m_workers.m_runningTaskCount;
        }

--- a/src/ripple/core/impl/Workers.h
+++ b/src/ripple/core/impl/Workers.h
@@ -31,6 +31,11 @@

 namespace ripple {

+namespace perf
+{
+    class PerfLog;
+}
+
 /** A group of threads that process tasks.
 */
 class Workers
@@ -45,9 +50,11 @@ public:
            that you only process one task from inside your callback. Each
            call to addTask will result in exactly one call to processTask.

+            @param instance The worker thread instance.
+
            @see Workers::addTask
        */
-        virtual void processTask () = 0;
+        virtual void processTask (int instance) = 0;
    };

    /** Create the object.
@@ -58,6 +65,7 @@ public:
        @param threadNames The name given to each created worker thread.
    */
    explicit Workers (Callback& callback,
+                      perf::PerfLog& perfLog,
                      std::string const& threadNames = "Worker",
                      int numberOfThreads =
                         static_cast<int>(std::thread::hardware_concurrency()));
@@ -126,7 +134,9 @@ private:
        , public beast::LockFreeStack <Worker, PausedTag>::Node
    {
    public:
-        Worker (Workers& workers, std::string const& threadName);
+        Worker (Workers& workers,
+            std::string const& threadName,
+            int const instance);

        ~Worker ();

@@ -138,6 +148,7 @@ private:
    private:
        Workers& m_workers;
        std::string const threadName_;
+        int const instance_;

        std::thread thread_;
        std::mutex mutex_;
@@ -151,6 +162,7 @@ private:

 private:
    Callback& m_callback;
+    perf::PerfLog& perfLog_;
    std::string m_threadNames;                   // The name to give each thread
    beast::WaitableEvent m_allPaused;            // signaled when all threads paused
    semaphore m_semaphore;                       // each pending task is 1 resource