rippled
JobQueue.cpp
1 //------------------------------------------------------------------------------
2 /*
3  This file is part of rippled: https://github.com/ripple/rippled
4  Copyright (c) 2012, 2013 Ripple Labs Inc.
5 
6  Permission to use, copy, modify, and/or distribute this software for any
7  purpose with or without fee is hereby granted, provided that the above
8  copyright notice and this permission notice appear in all copies.
9 
10  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 //==============================================================================
19 
20 #include <ripple/basics/PerfLog.h>
21 #include <ripple/basics/contract.h>
22 #include <ripple/core/JobQueue.h>
23 
24 namespace ripple {
25 
27  beast::insight::Collector::ptr const& collector,
28  Stoppable& parent,
29  beast::Journal journal,
30  Logs& logs,
31  perf::PerfLog& perfLog)
32  : Stoppable("JobQueue", parent)
33  , m_journal(journal)
34  , m_lastJob(0)
35  , m_invalidJobData(JobTypes::instance().getInvalid(), collector, logs)
36  , m_processCount(0)
37  , m_workers(*this, &perfLog, "JobQueue", 0)
38  , m_cancelCallback(std::bind(&Stoppable::isStopping, this))
39  , perfLog_(perfLog)
40  , m_collector(collector)
41 {
42  hook = m_collector->make_hook(std::bind(&JobQueue::collect, this));
43  job_count = m_collector->make_gauge("job_count");
44 
45  {
47 
48  for (auto const& x : JobTypes::instance())
49  {
50  JobTypeInfo const& jt = x.second;
51 
52  // And create dynamic information for all jobs
53  auto const result(m_jobData.emplace(
54  std::piecewise_construct,
57  assert(result.second == true);
58  (void)result.second;
59  }
60  }
61 }
62 
64 {
65  // Must unhook before destroying
67 }
68 
69 void
71 {
73  job_count = m_jobSet.size();
74 }
75 
76 bool
78  JobType type,
79  std::string const& name,
80  JobFunction const& func)
81 {
82  assert(type != jtINVALID);
83 
84  auto iter(m_jobData.find(type));
85  assert(iter != m_jobData.end());
86  if (iter == m_jobData.end())
87  return false;
88 
89  JLOG(m_journal.debug())
90  << __func__ << " : Adding job : " << name << " : " << type;
91  JobTypeData& data(iter->second);
92 
93  // FIXME: Workaround incorrect client shutdown ordering
94  // do not add jobs to a queue with no threads
95  assert(type == jtCLIENT || m_workers.getNumberOfThreads() > 0);
96 
97  {
99 
100  // If this goes off it means that a child didn't follow
101  // the Stoppable API rules. A job may only be added if:
102  //
103  // - The JobQueue has NOT stopped
104  // AND
105  // * We are currently processing jobs
106  // OR
107  // * We have have pending jobs
108  // OR
109  // * Not all children are stopped
110  //
111  assert(
112  !isStopped() &&
113  (m_processCount > 0 || !m_jobSet.empty() || !areChildrenStopped()));
114 
115  std::pair<std::set<Job>::iterator, bool> result(m_jobSet.insert(
116  Job(type, name, ++m_lastJob, data.load(), func, m_cancelCallback)));
117  queueJob(*result.first, lock);
118  }
119  return true;
120 }
121 
122 int
124 {
125  std::lock_guard lock(m_mutex);
126 
127  JobDataMap::const_iterator c = m_jobData.find(t);
128 
129  return (c == m_jobData.end()) ? 0 : c->second.waiting;
130 }
131 
132 int
134 {
135  std::lock_guard lock(m_mutex);
136 
137  JobDataMap::const_iterator c = m_jobData.find(t);
138 
139  return (c == m_jobData.end()) ? 0 : (c->second.waiting + c->second.running);
140 }
141 
142 int
144 {
145  // return the number of jobs at this priority level or greater
146  int ret = 0;
147 
148  std::lock_guard lock(m_mutex);
149 
150  for (auto const& x : m_jobData)
151  {
152  if (x.first >= t)
153  ret += x.second.waiting;
154  }
155 
156  return ret;
157 }
158 
159 void
160 JobQueue::setThreadCount(int c, bool const standaloneMode)
161 {
162  if (standaloneMode)
163  {
164  c = 1;
165  }
166  else if (c == 0)
167  {
168  c = static_cast<int>(std::thread::hardware_concurrency());
169  c = 2 + std::min(c, 4); // I/O will bottleneck
170  JLOG(m_journal.info()) << "Auto-tuning to " << c
171  << " validation/transaction/proposal threads.";
172  }
173  else
174  {
175  JLOG(m_journal.info()) << "Configured " << c
176  << " validation/transaction/proposal threads.";
177  }
178 
180 }
181 
184 {
185  JobDataMap::iterator iter(m_jobData.find(t));
186  assert(iter != m_jobData.end());
187 
188  if (iter == m_jobData.end())
189  return {};
190 
191  return std::make_unique<LoadEvent>(iter->second.load(), name, true);
192 }
193 
194 void
196 {
197  if (isStopped())
198  LogicError("JobQueue::addLoadEvents() called after JobQueue stopped");
199 
200  JobDataMap::iterator iter(m_jobData.find(t));
201  assert(iter != m_jobData.end());
202  iter->second.load().addSamples(count, elapsed);
203 }
204 
205 bool
207 {
208  int count = 0;
209 
210  for (auto& x : m_jobData)
211  {
212  if (x.second.load().isOver())
213  ++count;
214  }
215 
216  return count > 0;
217 }
218 
221 {
222  using namespace std::chrono_literals;
224 
225  ret["threads"] = m_workers.getNumberOfThreads();
226 
227  Json::Value priorities = Json::arrayValue;
228 
229  std::lock_guard lock(m_mutex);
230 
231  for (auto& x : m_jobData)
232  {
233  assert(x.first != jtINVALID);
234 
235  if (x.first == jtGENERIC)
236  continue;
237 
238  JobTypeData& data(x.second);
239 
240  LoadMonitor::Stats stats(data.stats());
241 
242  int waiting(data.waiting);
243  int running(data.running);
244 
245  if ((stats.count != 0) || (waiting != 0) ||
246  (stats.latencyPeak != 0ms) || (running != 0))
247  {
248  Json::Value& pri = priorities.append(Json::objectValue);
249 
250  pri["job_type"] = data.name();
251 
252  if (stats.isOverloaded)
253  pri["over_target"] = true;
254 
255  if (waiting != 0)
256  pri["waiting"] = waiting;
257 
258  if (stats.count != 0)
259  pri["per_second"] = static_cast<int>(stats.count);
260 
261  if (stats.latencyPeak != 0ms)
262  pri["peak_time"] = static_cast<int>(stats.latencyPeak.count());
263 
264  if (stats.latencyAvg != 0ms)
265  pri["avg_time"] = static_cast<int>(stats.latencyAvg.count());
266 
267  if (running != 0)
268  pri["in_progress"] = running;
269  }
270  }
271 
272  ret["job_types"] = priorities;
273 
274  return ret;
275 }
276 
277 void
279 {
281  cv_.wait(lock, [&] { return m_processCount == 0 && m_jobSet.empty(); });
282 }
283 
286 {
287  JobDataMap::iterator c(m_jobData.find(type));
288  assert(c != m_jobData.end());
289 
290  // NIKB: This is ugly and I hate it. We must remove jtINVALID completely
291  // and use something sane.
292  if (c == m_jobData.end())
293  return m_invalidJobData;
294 
295  return c->second;
296 }
297 
298 void
300 {
301  // onStop must be defined and empty here,
302  // otherwise the base class will do the wrong thing.
303 }
304 
305 void
307 {
308  // We are stopped when all of the following are true:
309  //
310  // 1. A stop notification was received
311  // 2. All Stoppable children have stopped
312  // 3. There are no executing calls to processTask
313  // 4. There are no remaining Jobs in the job set
314  // 5. There are no suspended coroutines
315  //
316  if (isStopping() && areChildrenStopped() && (m_processCount == 0) &&
317  m_jobSet.empty() && nSuspend_ == 0)
318  {
319  stopped();
320  }
321 }
322 
323 void
325 {
326  JobType const type(job.getType());
327  assert(type != jtINVALID);
328  assert(m_jobSet.find(job) != m_jobSet.end());
329  perfLog_.jobQueue(type);
330 
331  JobTypeData& data(getJobTypeData(type));
332 
333  if (data.waiting + data.running < getJobLimit(type))
334  {
335  m_workers.addTask();
336  }
337  else
338  {
339  // defer the task until we go below the limit
340  //
341  ++data.deferred;
342  }
343  ++data.waiting;
344 }
345 
346 void
348 {
349  assert(!m_jobSet.empty());
350 
352  for (iter = m_jobSet.begin(); iter != m_jobSet.end(); ++iter)
353  {
354  JobTypeData& data(getJobTypeData(iter->getType()));
355 
356  assert(data.running <= getJobLimit(data.type()));
357 
358  // Run this job if we're running below the limit.
359  if (data.running < getJobLimit(data.type()))
360  {
361  assert(data.waiting > 0);
362  break;
363  }
364  }
365 
366  assert(iter != m_jobSet.end());
367 
368  JobType const type = iter->getType();
369  JobTypeData& data(getJobTypeData(type));
370 
371  assert(type != jtINVALID);
372 
373  job = *iter;
374  m_jobSet.erase(iter);
375 
376  --data.waiting;
377  ++data.running;
378 }
379 
380 void
382 {
383  assert(type != jtINVALID);
384 
385  JobTypeData& data = getJobTypeData(type);
386 
387  // Queue a deferred task if possible
388  if (data.deferred > 0)
389  {
390  assert(data.running + data.waiting >= getJobLimit(type));
391 
392  --data.deferred;
393  m_workers.addTask();
394  }
395 
396  --data.running;
397 }
398 
399 void
401 {
402  JobType type;
403 
404  {
405  using namespace std::chrono;
406  Job::clock_type::time_point const start_time(Job::clock_type::now());
407  {
408  Job job;
409  {
410  std::lock_guard lock(m_mutex);
411  getNextJob(job);
412  ++m_processCount;
413  }
414  type = job.getType();
415  JobTypeData& data(getJobTypeData(type));
416  JLOG(m_journal.trace()) << "Doing " << data.name() << "job";
417 
418  // The amount of time that the job was in the queue
419  auto const q_time =
420  date::ceil<microseconds>(start_time - job.queue_time());
421  perfLog_.jobStart(type, q_time, start_time, instance);
422 
423  job.doJob();
424 
425  // The amount of time it took to execute the job
426  auto const x_time =
427  date::ceil<microseconds>(Job::clock_type::now() - start_time);
428 
429  if (x_time >= 10ms || q_time >= 10ms)
430  {
431  getJobTypeData(type).dequeue.notify(q_time);
432  getJobTypeData(type).execute.notify(x_time);
433  }
434  perfLog_.jobFinish(type, x_time, instance);
435  }
436  }
437 
438  {
439  std::lock_guard lock(m_mutex);
440  // Job should be destroyed before calling checkStopped
441  // otherwise destructors with side effects can access
442  // parent objects that are already destroyed.
443  finishJob(type);
444  if (--m_processCount == 0 && m_jobSet.empty())
445  cv_.notify_all();
446  checkStopped(lock);
447  }
448 
449  // Note that when Job::~Job is called, the last reference
450  // to the associated LoadEvent object (in the Job) may be destroyed.
451 }
452 
453 int
455 {
456  JobTypeInfo const& j(JobTypes::instance().get(type));
457  assert(j.type() != jtINVALID);
458 
459  return j.limit();
460 }
461 
462 void
464 {
465  std::lock_guard lock(m_mutex);
466  checkStopped(lock);
467 }
468 
469 } // namespace ripple
ripple::JobQueue::finishJob
void finishJob(JobType type)
Definition: JobQueue.cpp:381
ripple::JobQueue::m_jobSet
std::set< Job > m_jobSet
Definition: JobQueue.h:239
ripple::JobQueue::nSuspend_
int nSuspend_
Definition: JobQueue.h:247
std::bind
T bind(T... args)
std::string
STL class.
std::shared_ptr< Collector >
ripple::jtCLIENT
@ jtCLIENT
Definition: Job.h:48
ripple::Logs
Manages partitions for logging.
Definition: Log.h:48
ripple::Stoppable::stopped
void stopped()
Called by derived classes to indicate that the stoppable has stopped.
Definition: Stoppable.cpp:72
beast::Journal::trace
Stream trace() const
Severity stream access functions.
Definition: Journal.h:309
ripple::JobTypes
Definition: JobTypes.h:32
Json::arrayValue
@ arrayValue
array value (ordered list)
Definition: json_value.h:42
std::pair
ripple::JobTypeData::execute
beast::insight::Event execute
Definition: JobTypeData.h:52
std::map::find
T find(T... args)
ripple::Workers::getNumberOfThreads
int getNumberOfThreads() const noexcept
Retrieve the desired number of threads.
Definition: Workers.cpp:53
ripple::JobQueue::JobQueue
JobQueue(beast::insight::Collector::ptr const &collector, Stoppable &parent, beast::Journal journal, Logs &logs, perf::PerfLog &perfLog)
Definition: JobQueue.cpp:26
ripple::JobQueue::getJobLimit
int getJobLimit(JobType type)
Definition: JobQueue.cpp:454
std::chrono::milliseconds
ripple::JobQueue::getJson
Json::Value getJson(int c=0)
Definition: JobQueue.cpp:220
ripple::LoadMonitor::Stats::count
std::uint64_t count
Definition: LoadMonitor.h:60
ripple::LoadMonitor::Stats::isOverloaded
bool isOverloaded
Definition: LoadMonitor.h:63
ripple::JobQueue::checkStopped
void checkStopped(std::lock_guard< std::mutex > const &lock)
Definition: JobQueue.cpp:306
std::map::emplace
T emplace(T... args)
ripple::Workers::setNumberOfThreads
void setNumberOfThreads(int numberOfThreads)
Set the desired number of threads.
Definition: Workers.cpp:63
ripple::Job::queue_time
clock_type::time_point const & queue_time() const
Returns the time when the job was queued.
Definition: Job.cpp:65
std::lock_guard
STL class.
ripple::perf::PerfLog
Singleton class that maintains performance counters and optionally writes Json-formatted data to a di...
Definition: PerfLog.h:46
ripple::JobQueue::addRefCountedJob
bool addRefCountedJob(JobType type, std::string const &name, JobFunction const &func)
Definition: JobQueue.cpp:77
std::function
ripple::JobQueue::m_journal
beast::Journal m_journal
Definition: JobQueue.h:236
ripple::JobTypeInfo::limit
int limit() const
Definition: JobTypeInfo.h:77
ripple::JobQueue::onStop
void onStop() override
Override called when the stop notification is issued.
Definition: JobQueue.cpp:299
ripple::JobQueue::getJobCount
int getJobCount(JobType t) const
Jobs waiting at this priority.
Definition: JobQueue.cpp:123
ripple::JobQueue::m_invalidJobData
JobTypeData m_invalidJobData
Definition: JobQueue.h:241
ripple::JobTypeData::dequeue
beast::insight::Event dequeue
Definition: JobTypeData.h:51
ripple::JobQueue::getJobCountTotal
int getJobCountTotal(JobType t) const
Jobs waiting plus running at this priority.
Definition: JobQueue.cpp:133
ripple::Stoppable::isStopped
bool isStopped() const
Returns true if the requested stop has completed.
Definition: Stoppable.cpp:60
Json::Value::append
Value & append(const Value &value)
Append value to array at the end.
Definition: json_value.cpp:882
ripple::LoadMonitor::Stats
Definition: LoadMonitor.h:56
ripple::Stoppable
Provides an interface for starting and stopping.
Definition: Stoppable.h:201
Json::objectValue
@ objectValue
object value (collection of name/value pairs).
Definition: json_value.h:43
ripple::JobQueue::~JobQueue
~JobQueue()
Definition: JobQueue.cpp:63
ripple::JobQueue::hook
beast::insight::Hook hook
Definition: JobQueue.h:256
std::thread::hardware_concurrency
T hardware_concurrency(T... args)
ripple::JobQueue::m_mutex
std::mutex m_mutex
Definition: JobQueue.h:237
ripple::JobTypeInfo::type
JobType type() const
Definition: JobTypeInfo.h:65
ripple::JobQueue::m_collector
beast::insight::Collector::ptr m_collector
Definition: JobQueue.h:254
ripple::JobQueue::isOverloaded
bool isOverloaded()
Definition: JobQueue.cpp:206
std::unique_lock
STL class.
ripple::JobTypeInfo
Holds all the 'static' information about a job, which does not change.
Definition: JobTypeInfo.h:28
ripple::jtGENERIC
@ jtGENERIC
Definition: Job.h:74
ripple::Stoppable::areChildrenStopped
bool areChildrenStopped() const
Returns true if all children have stopped.
Definition: Stoppable.cpp:66
ripple::jtINVALID
@ jtINVALID
Definition: Job.h:35
ripple::JobTypes::instance
static JobTypes const & instance()
Definition: JobTypes.h:101
ripple::JobQueue::processTask
void processTask(int instance) override
Perform a task.
Definition: JobQueue.cpp:400
ripple::JobQueue::onChildrenStopped
void onChildrenStopped() override
Override called when all children have stopped.
Definition: JobQueue.cpp:463
ripple::JobQueue::m_jobData
JobDataMap m_jobData
Definition: JobQueue.h:240
beast::Journal::info
Stream info() const
Definition: Journal.h:321
ripple::JobTypeData
Definition: JobTypeData.h:29
ripple::Job
Definition: Job.h:82
beast::Journal
A generic endpoint for log messages.
Definition: Journal.h:58
std::condition_variable::wait
T wait(T... args)
ripple::JobQueue::getJobTypeData
JobTypeData & getJobTypeData(JobType type)
Definition: JobQueue.cpp:285
ripple::JobQueue::m_lastJob
std::uint64_t m_lastJob
Definition: JobQueue.h:238
ripple::perf::PerfLog::jobQueue
virtual void jobQueue(JobType const type)=0
Log queued job.
ripple::perf::PerfLog::jobStart
virtual void jobStart(JobType const type, microseconds dur, steady_time_point startTime, int instance)=0
Log job executing.
std::forward_as_tuple
T forward_as_tuple(T... args)
ripple::JobQueue::rendezvous
void rendezvous()
Block until no tasks running.
Definition: JobQueue.cpp:278
ripple::JobQueue::m_cancelCallback
Job::CancelCallback m_cancelCallback
Definition: JobQueue.h:250
std::min
T min(T... args)
ripple::JobQueue::getJobCountGE
int getJobCountGE(JobType t) const
All waiting jobs at or greater than this priority.
Definition: JobQueue.cpp:143
ripple
Use hash_* containers for keys that do not need a cryptographically secure hashing algorithm.
Definition: RCLCensorshipDetector.h:29
ripple::JobQueue::queueJob
void queueJob(Job const &job, std::lock_guard< std::mutex > const &lock)
Definition: JobQueue.cpp:324
ripple::JobQueue::perfLog_
perf::PerfLog & perfLog_
Definition: JobQueue.h:253
std
STL namespace.
ripple::LogicError
void LogicError(std::string const &how) noexcept
Called when faulty logic causes a broken invariant.
Definition: contract.cpp:48
ripple::JobQueue::setThreadCount
void setThreadCount(int c, bool const standaloneMode)
Set the number of thread serving the job queue to precisely this number.
Definition: JobQueue.cpp:160
std::chrono::milliseconds::count
T count(T... args)
ripple::Workers::addTask
void addTask()
Add a task to be performed.
Definition: Workers.cpp:126
ripple::LoadMonitor::Stats::latencyAvg
std::chrono::milliseconds latencyAvg
Definition: LoadMonitor.h:61
beast::Journal::debug
Stream debug() const
Definition: Journal.h:315
ripple::JobType
JobType
Definition: Job.h:33
ripple::JobQueue::job_count
beast::insight::Gauge job_count
Definition: JobQueue.h:255
std::map::end
T end(T... args)
ripple::Job::getType
JobType getType() const
Definition: Job.cpp:52
ripple::JobQueue::m_processCount
int m_processCount
Definition: JobQueue.h:244
ripple::JobQueue::cv_
std::condition_variable cv_
Definition: JobQueue.h:258
ripple::JobQueue::makeLoadEvent
std::unique_ptr< LoadEvent > makeLoadEvent(JobType t, std::string const &name)
Return a scoped LoadEvent.
Definition: JobQueue.cpp:183
ripple::JobQueue::m_workers
Workers m_workers
Definition: JobQueue.h:249
ripple::JobQueue::addLoadEvents
void addLoadEvents(JobType t, int count, std::chrono::milliseconds elapsed)
Add multiple load events.
Definition: JobQueue.cpp:195
ripple::Job::doJob
void doJob()
Definition: Job.cpp:79
std::unique_ptr
STL class.
ripple::LoadMonitor::Stats::latencyPeak
std::chrono::milliseconds latencyPeak
Definition: LoadMonitor.h:62
beast::insight::Hook
A reference to a handler for performing polled collection.
Definition: Hook.h:31
std::condition_variable::notify_all
T notify_all(T... args)
std::set
STL class.
ripple::JobQueue::getNextJob
void getNextJob(Job &job)
Definition: JobQueue.cpp:347
ripple::perf::PerfLog::jobFinish
virtual void jobFinish(JobType const type, microseconds dur, int instance)=0
Log job finishing.
Json::Value
Represents a JSON value.
Definition: json_value.h:145
beast::insight::Event::notify
void notify(std::chrono::duration< Rep, Period > const &value) const
Push an event notification.
Definition: Event.h:66
ripple::get
T & get(EitherAmount &amt)
Definition: AmountSpec.h:116
ripple::JobQueue::collect
void collect()
Definition: JobQueue.cpp:70
ripple::Stoppable::isStopping
bool isStopping() const
Returns true if the stoppable should stop.
Definition: Stoppable.cpp:54
std::chrono
std::chrono::steady_clock::now
T now(T... args)