mirror of
https://github.com/XRPLF/rippled.git
synced 2025-11-20 11:05:54 +00:00
Implement new coroutines (RIPD-1043)
This commit is contained in:
committed by
Nik Bougalis
parent
880f354b90
commit
108906cb20
@@ -21,70 +21,277 @@
|
||||
#define RIPPLE_CORE_JOBQUEUE_H_INCLUDED
|
||||
|
||||
#include <ripple/core/JobTypes.h>
|
||||
#include <ripple/core/JobTypeData.h>
|
||||
#include <ripple/core/JobCoro.h>
|
||||
#include <ripple/json/json_value.h>
|
||||
#include <beast/insight/Collector.h>
|
||||
#include <beast/threads/Stoppable.h>
|
||||
#include <beast/module/core/thread/Workers.h>
|
||||
#include <boost/function.hpp>
|
||||
#include <boost/optional.hpp>
|
||||
#include <thread>
|
||||
#include <set>
|
||||
|
||||
namespace ripple {
|
||||
|
||||
class Logs;
|
||||
|
||||
class JobQueue : public beast::Stoppable
|
||||
class JobQueue
|
||||
: public beast::Stoppable
|
||||
, private beast::Workers::Callback
|
||||
{
|
||||
protected:
|
||||
JobQueue (char const* name, Stoppable& parent);
|
||||
|
||||
public:
|
||||
using JobFunction = std::function <void (Job&)>;
|
||||
virtual ~JobQueue () { }
|
||||
using JobFunction = std::function <void(Job&)>;
|
||||
|
||||
virtual void addJob (
|
||||
JobType, std::string const& name, JobFunction const&) = 0;
|
||||
JobQueue (beast::insight::Collector::ptr const& collector,
|
||||
Stoppable& parent, beast::Journal journal, Logs& logs);
|
||||
~JobQueue ();
|
||||
|
||||
// Jobs waiting at this priority
|
||||
virtual int getJobCount (JobType t) const = 0;
|
||||
void addJob (JobType type, std::string const& name, JobFunction const& func);
|
||||
|
||||
// Jobs waiting plus running at this priority
|
||||
virtual int getJobCountTotal (JobType t) const = 0;
|
||||
/** Creates a coroutine and adds a job to the queue which will run it.
|
||||
|
||||
// All waiting jobs at or greater than this priority
|
||||
virtual int getJobCountGE (JobType t) const = 0;
|
||||
@param t The type of job.
|
||||
@param name Name of the job.
|
||||
@param f Has a signature of void(std::shared_ptr<JobCoro>). Called when the job executes.
|
||||
*/
|
||||
template <class F>
|
||||
void postCoro (JobType t, std::string const& name, F&& f);
|
||||
|
||||
virtual void shutdown () = 0;
|
||||
/** Jobs waiting at this priority.
|
||||
*/
|
||||
int getJobCount (JobType t) const;
|
||||
|
||||
virtual void setThreadCount (int c, bool const standaloneMode) = 0;
|
||||
/** Jobs waiting plus running at this priority.
|
||||
*/
|
||||
int getJobCountTotal (JobType t) const;
|
||||
|
||||
/** All waiting jobs at or greater than this priority.
|
||||
*/
|
||||
int getJobCountGE (JobType t) const;
|
||||
|
||||
/** Shut down the job queue without completing pending jobs.
|
||||
*/
|
||||
void shutdown ();
|
||||
|
||||
/** Set the number of thread serving the job queue to precisely this number.
|
||||
*/
|
||||
void setThreadCount (int c, bool const standaloneMode);
|
||||
|
||||
// VFALCO TODO Rename these to newLoadEventMeasurement or something similar
|
||||
// since they create the object.
|
||||
//
|
||||
virtual LoadEvent::pointer getLoadEvent (
|
||||
JobType t, std::string const& name) = 0;
|
||||
LoadEvent::pointer getLoadEvent (JobType t, std::string const& name);
|
||||
|
||||
// VFALCO TODO Why do we need two versions, one which returns a shared
|
||||
// pointer and the other which returns an autoptr?
|
||||
//
|
||||
virtual LoadEvent::autoptr getLoadEventAP (
|
||||
JobType t, std::string const& name) = 0;
|
||||
LoadEvent::autoptr getLoadEventAP (JobType t, std::string const& name);
|
||||
|
||||
// Add multiple load events
|
||||
virtual void addLoadEvents (
|
||||
JobType t, int count, std::chrono::milliseconds elapsed) = 0;
|
||||
/** Add multiple load events.
|
||||
*/
|
||||
void addLoadEvents (JobType t, int count, std::chrono::milliseconds elapsed);
|
||||
|
||||
virtual bool isOverloaded () = 0;
|
||||
// Cannot be const because LoadMonitor has no const methods.
|
||||
bool isOverloaded ();
|
||||
|
||||
/** Get the Job corresponding to a thread. If no thread, use the current
|
||||
thread. */
|
||||
virtual Job* getJobForThread (std::thread::id const& id = {}) const = 0;
|
||||
Job* getJobForThread(std::thread::id const& id = {}) const;
|
||||
|
||||
virtual Json::Value getJson (int c = 0) = 0;
|
||||
// Cannot be const because LoadMonitor has no const methods.
|
||||
Json::Value getJson (int c = 0);
|
||||
|
||||
private:
|
||||
using JobDataMap = std::map <JobType, JobTypeData>;
|
||||
|
||||
beast::Journal m_journal;
|
||||
mutable std::mutex m_mutex;
|
||||
std::uint64_t m_lastJob;
|
||||
std::set <Job> m_jobSet;
|
||||
JobDataMap m_jobData;
|
||||
JobTypeData m_invalidJobData;
|
||||
|
||||
std::map <std::thread::id, Job*> m_threadIds;
|
||||
|
||||
// The number of jobs currently in processTask()
|
||||
int m_processCount;
|
||||
|
||||
beast::Workers m_workers;
|
||||
Job::CancelCallback m_cancelCallback;
|
||||
|
||||
// Statistics tracking
|
||||
beast::insight::Collector::ptr m_collector;
|
||||
beast::insight::Gauge job_count;
|
||||
beast::insight::Hook hook;
|
||||
|
||||
static JobTypes const& getJobTypes()
|
||||
{
|
||||
static JobTypes types;
|
||||
return types;
|
||||
}
|
||||
|
||||
void collect();
|
||||
JobTypeData& getJobTypeData (JobType type);
|
||||
|
||||
// Signals the service stopped if the stopped condition is met.
|
||||
void checkStopped (std::lock_guard <std::mutex> const& lock);
|
||||
|
||||
// Signals an added Job for processing.
|
||||
//
|
||||
// Pre-conditions:
|
||||
// The JobType must be valid.
|
||||
// The Job must exist in mJobSet.
|
||||
// The Job must not have previously been queued.
|
||||
//
|
||||
// Post-conditions:
|
||||
// Count of waiting jobs of that type will be incremented.
|
||||
// If JobQueue exists, and has at least one thread, Job will eventually run.
|
||||
//
|
||||
// Invariants:
|
||||
// The calling thread owns the JobLock
|
||||
void queueJob (Job const& job, std::lock_guard <std::mutex> const& lock);
|
||||
|
||||
// Returns the next Job we should run now.
|
||||
//
|
||||
// RunnableJob:
|
||||
// A Job in the JobSet whose slots count for its type is greater than zero.
|
||||
//
|
||||
// Pre-conditions:
|
||||
// mJobSet must not be empty.
|
||||
// mJobSet holds at least one RunnableJob
|
||||
//
|
||||
// Post-conditions:
|
||||
// job is a valid Job object.
|
||||
// job is removed from mJobQueue.
|
||||
// Waiting job count of its type is decremented
|
||||
// Running job count of its type is incremented
|
||||
//
|
||||
// Invariants:
|
||||
// The calling thread owns the JobLock
|
||||
void getNextJob (Job& job);
|
||||
|
||||
// Indicates that a running Job has completed its task.
|
||||
//
|
||||
// Pre-conditions:
|
||||
// Job must not exist in mJobSet.
|
||||
// The JobType must not be invalid.
|
||||
//
|
||||
// Post-conditions:
|
||||
// The running count of that JobType is decremented
|
||||
// A new task is signaled if there are more waiting Jobs than the limit, if any.
|
||||
//
|
||||
// Invariants:
|
||||
// <none>
|
||||
void finishJob (Job const& job);
|
||||
|
||||
template <class Rep, class Period>
|
||||
void on_dequeue (JobType type,
|
||||
std::chrono::duration <Rep, Period> const& value);
|
||||
|
||||
template <class Rep, class Period>
|
||||
void on_execute (JobType type,
|
||||
std::chrono::duration <Rep, Period> const& value);
|
||||
|
||||
// Runs the next appropriate waiting Job.
|
||||
//
|
||||
// Pre-conditions:
|
||||
// A RunnableJob must exist in the JobSet
|
||||
//
|
||||
// Post-conditions:
|
||||
// The chosen RunnableJob will have Job::doJob() called.
|
||||
//
|
||||
// Invariants:
|
||||
// <none>
|
||||
void processTask () override;
|
||||
|
||||
// Returns `true` if all jobs of this type should be skipped when
|
||||
// the JobQueue receives a stop notification. If the job type isn't
|
||||
// skipped, the Job will be called and the job must call Job::shouldCancel
|
||||
// to determine if a long running or non-mandatory operation should be canceled.
|
||||
bool skipOnStop (JobType type);
|
||||
|
||||
// Returns the limit of running jobs for the given job type.
|
||||
// For jobs with no limit, we return the largest int. Hopefully that
|
||||
// will be enough.
|
||||
int getJobLimit (JobType type);
|
||||
|
||||
void onStop () override;
|
||||
void onChildrenStopped () override;
|
||||
};
|
||||
|
||||
std::unique_ptr <JobQueue>
|
||||
make_JobQueue (beast::insight::Collector::ptr const& collector,
|
||||
beast::Stoppable& parent, beast::Journal journal, Logs& logs);
|
||||
/*
|
||||
An RPC command is received and is handled via ServerHandler(HTTP) or
|
||||
Handler(websocket), depending on the connection type. The handler then calls
|
||||
the JobQueue::postCoro() method to create a coroutine and run it at a later
|
||||
point. This frees up the handler thread and allows it to continue handling
|
||||
other requests while the RPC command completes its work asynchronously.
|
||||
|
||||
postCoro() creates a JobCoro object. When the JobCoro ctor is called, and its
|
||||
coro_ member is initialized(a boost::coroutines::pull_type), execution
|
||||
automatically passes to the coroutine, which we don't want at this point,
|
||||
since we are still in the handler thread context. It's important to note here
|
||||
that construction of a boost pull_type automatically passes execution to the
|
||||
coroutine. A pull_type object automatically generates a push_type that is
|
||||
used as the as a parameter(do_yield) in the signature of the function the
|
||||
pull_type was created with. This function is immediately called during coro_
|
||||
construction and within it, JobCoro::yield_ is assigned the push_type
|
||||
parameter(do_yield) address and called(yield()) so we can return execution
|
||||
back to the caller's stack.
|
||||
|
||||
postCoro() then calls JobCoro::post(), which schedules a job on the job
|
||||
queue to continue execution of the coroutine in a JobQueue worker thread at
|
||||
some later time. When the job runs, we lock on the JobCoro::mutex_ and call
|
||||
coro_ which continues where we had left off. Since we the last thing we did
|
||||
in coro_ was call yield(), the next thing we continue with is calling the
|
||||
function param f, that was passed into JobCoro ctor. It is within this
|
||||
function body that the caller specifies what he would like to do while
|
||||
running in the coroutine and allow them to suspend and resume execution.
|
||||
A task that relies on other events to complete, such as path finding, calls
|
||||
JobCoro::yield() to suspend its execution while waiting on those events to
|
||||
complete and continue when signaled via the JobCoro::post() method.
|
||||
|
||||
There is a potential race condition that exists here where post() can get
|
||||
called before yield() after f is called. Technically the problem only occurs
|
||||
if the job that post() scheduled is executed before yield() is called.
|
||||
If the post() job were to be executed before yield(), undefined behavior
|
||||
would occur. The lock ensures that coro_ is not called again until we exit
|
||||
the coroutine. At which point a scheduled resume() job waiting on the lock
|
||||
would gain entry, harmlessly call coro_ and immediately return as we have
|
||||
already completed the coroutine.
|
||||
|
||||
The race condition occurs as follows:
|
||||
|
||||
1- The coroutine is running.
|
||||
2- The coroutine is about to suspend, but before it can do so, it must
|
||||
arrange for some event to wake it up.
|
||||
3- The coroutine arranges for some event to wake it up.
|
||||
4- Before the coroutine can suspend, that event occurs and the resumption
|
||||
of the coroutine is scheduled on the job queue.
|
||||
5- Again, before the coroutine can suspend, the resumption of the coroutine
|
||||
is dispatched.
|
||||
6- Again, before the coroutine can suspend, the resumption code runs the
|
||||
coroutine.
|
||||
The coroutine is now running in two threads.
|
||||
|
||||
The lock prevents this from happening as step 6 will block until the
|
||||
lock is released which only happens after the coroutine completes.
|
||||
*/
|
||||
|
||||
} // ripple
|
||||
|
||||
#include <ripple/core/JobCoro.ipp>
|
||||
|
||||
namespace ripple {
|
||||
|
||||
template <class F>
|
||||
void JobQueue::postCoro (JobType t, std::string const& name, F&& f)
|
||||
{
|
||||
/* First param is a detail type to make construction private.
|
||||
Last param is the function the coroutine runs. Signature of
|
||||
void(std::shared_ptr<JobCoro>).
|
||||
*/
|
||||
auto const coro = std::make_shared<JobCoro>(
|
||||
detail::JobCoro_create_t{}, *this, t, name, std::forward<F>(f));
|
||||
coro->post();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user