rippled
ReportingETL.cpp
1 //------------------------------------------------------------------------------
2 /*
3  This file is part of rippled: https://github.com/ripple/rippled
4  Copyright (c) 2020 Ripple Labs Inc.
5 
6  Permission to use, copy, modify, and/or distribute this software for any
7  purpose with or without fee is hereby granted, provided that the above
8  copyright notice and this permission notice appear in all copies.
9 
10  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 //==============================================================================
19 
20 #include <ripple/app/reporting/DBHelpers.h>
21 #include <ripple/app/reporting/ReportingETL.h>
22 
23 #include <ripple/beast/core/CurrentThreadName.h>
24 #include <ripple/json/json_reader.h>
25 #include <ripple/json/json_writer.h>
26 #include <boost/asio/connect.hpp>
27 #include <boost/asio/ip/tcp.hpp>
28 #include <boost/beast/core.hpp>
29 #include <boost/beast/websocket.hpp>
30 #include <cstdlib>
31 #include <iostream>
32 #include <string>
33 #include <variant>
34 
35 namespace ripple {
36 
37 namespace detail {
40 toString(LedgerInfo const& info)
41 {
43  ss << "LedgerInfo { Sequence : " << info.seq
44  << " Hash : " << strHex(info.hash) << " TxHash : " << strHex(info.txHash)
45  << " AccountHash : " << strHex(info.accountHash)
46  << " ParentHash : " << strHex(info.parentHash) << " }";
47  return ss.str();
48 }
49 } // namespace detail
50 
51 void
55 {
57  size_t num = 0;
58  while (!stopping_ && (sle = writeQueue.pop()))
59  {
60  assert(sle);
61  if (!ledger->exists(sle->key()))
62  ledger->rawInsert(sle);
63 
64  if (flushInterval_ != 0 && (num % flushInterval_) == 0)
65  {
66  JLOG(journal_.debug()) << "Flushing! key = " << strHex(sle->key());
67  ledger->stateMap().flushDirty(hotACCOUNT_NODE);
68  }
69  ++num;
70  }
71 }
72 
76  org::xrpl::rpc::v1::GetLedgerResponse& data)
77 {
79  for (auto& txn : data.transactions_list().transactions())
80  {
81  auto& raw = txn.transaction_blob();
82 
83  SerialIter it{raw.data(), raw.size()};
84  STTx sttx{it};
85 
86  auto txSerializer = std::make_shared<Serializer>(sttx.getSerializer());
87 
88  TxMeta txMeta{
89  sttx.getTransactionID(), ledger->info().seq, txn.metadata_blob()};
90 
91  auto metaSerializer =
92  std::make_shared<Serializer>(txMeta.getAsObject().getSerializer());
93 
94  JLOG(journal_.trace())
95  << __func__ << " : "
96  << "Inserting transaction = " << sttx.getTransactionID();
97  uint256 nodestoreHash = ledger->rawTxInsertWithHash(
98  sttx.getTransactionID(), txSerializer, metaSerializer);
99  accountTxData.emplace_back(txMeta, std::move(nodestoreHash), journal_);
100  }
101  return accountTxData;
102 }
103 
105 ReportingETL::loadInitialLedger(uint32_t startingSequence)
106 {
107  // check that database is actually empty
108  auto ledger = std::const_pointer_cast<Ledger>(
110  if (ledger)
111  {
112  JLOG(journal_.fatal()) << __func__ << " : "
113  << "Database is not empty";
114  assert(false);
115  return {};
116  }
117 
118  // fetch the ledger from the network. This function will not return until
119  // either the fetch is successful, or the server is being shutdown. This
120  // only fetches the ledger header and the transactions+metadata
122  fetchLedgerData(startingSequence)};
123  if (!ledgerData)
124  return {};
125 
126  LedgerInfo lgrInfo =
127  deserializeHeader(makeSlice(ledgerData->ledger_header()), true);
128 
129  JLOG(journal_.debug()) << __func__ << " : "
130  << "Deserialized ledger header. "
131  << detail::toString(lgrInfo);
132 
133  ledger =
134  std::make_shared<Ledger>(lgrInfo, app_.config(), app_.getNodeFamily());
135  ledger->stateMap().clearSynching();
136  ledger->txMap().clearSynching();
137 
138 #ifdef RIPPLED_REPORTING
140  insertTransactions(ledger, *ledgerData);
141 #endif
142 
143  auto start = std::chrono::system_clock::now();
144 
146  std::thread asyncWriter{[this, &ledger, &writeQueue]() {
147  consumeLedgerData(ledger, writeQueue);
148  }};
149 
150  // download the full account state map. This function downloads full ledger
151  // data and pushes the downloaded data into the writeQueue. asyncWriter
152  // consumes from the queue and inserts the data into the Ledger object.
153  // Once the below call returns, all data has been pushed into the queue
154  loadBalancer_.loadInitialLedger(startingSequence, writeQueue);
155 
156  // null is used to respresent the end of the queue
158  writeQueue.push(null);
159  // wait for the writer to finish
160  asyncWriter.join();
161 
162  if (!stopping_)
163  {
164  flushLedger(ledger);
165  if (app_.config().reporting())
166  {
167 #ifdef RIPPLED_REPORTING
168  writeToPostgres(
169  ledger->info(), accountTxData, app_.getPgPool(), journal_);
170 #endif
171  }
172  }
173  auto end = std::chrono::system_clock::now();
174  JLOG(journal_.debug()) << "Time to download and store ledger = "
175  << ((end - start).count()) / 1000000000.0;
176  return ledger;
177 }
178 
179 void
181 {
182  JLOG(journal_.debug()) << __func__ << " : "
183  << "Flushing ledger. "
184  << detail::toString(ledger->info());
185  // These are recomputed in setImmutable
186  auto& accountHash = ledger->info().accountHash;
187  auto& txHash = ledger->info().txHash;
188  auto& ledgerHash = ledger->info().hash;
189 
190  ledger->setImmutable(app_.config(), false);
191  auto start = std::chrono::system_clock::now();
192 
193  auto numFlushed = ledger->stateMap().flushDirty(hotACCOUNT_NODE);
194 
195  auto numTxFlushed = ledger->txMap().flushDirty(hotTRANSACTION_NODE);
196 
197  {
198  Serializer s(128);
200  addRaw(ledger->info(), s);
202  hotLEDGER,
203  std::move(s.modData()),
204  ledger->info().hash,
205  ledger->info().seq);
206  }
207 
208  app_.getNodeStore().sync();
209 
210  auto end = std::chrono::system_clock::now();
211 
212  JLOG(journal_.debug()) << __func__ << " : "
213  << "Flushed " << numFlushed
214  << " nodes to nodestore from stateMap";
215  JLOG(journal_.debug()) << __func__ << " : "
216  << "Flushed " << numTxFlushed
217  << " nodes to nodestore from txMap";
218 
219  JLOG(journal_.debug()) << __func__ << " : "
220  << "Flush took "
221  << (end - start).count() / 1000000000.0
222  << " seconds";
223 
224  if (numFlushed == 0)
225  {
226  JLOG(journal_.fatal()) << __func__ << " : "
227  << "Flushed 0 nodes from state map";
228  assert(false);
229  }
230  if (numTxFlushed == 0)
231  {
232  JLOG(journal_.warn()) << __func__ << " : "
233  << "Flushed 0 nodes from tx map";
234  }
235 
236  // Make sure calculated hashes are correct
237  if (ledger->stateMap().getHash().as_uint256() != accountHash)
238  {
239  JLOG(journal_.fatal())
240  << __func__ << " : "
241  << "State map hash does not match. "
242  << "Expected hash = " << strHex(accountHash) << "Actual hash = "
243  << strHex(ledger->stateMap().getHash().as_uint256());
244  Throw<std::runtime_error>("state map hash mismatch");
245  }
246 
247  if (ledger->txMap().getHash().as_uint256() != txHash)
248  {
249  JLOG(journal_.fatal())
250  << __func__ << " : "
251  << "Tx map hash does not match. "
252  << "Expected hash = " << strHex(txHash) << "Actual hash = "
253  << strHex(ledger->txMap().getHash().as_uint256());
254  Throw<std::runtime_error>("tx map hash mismatch");
255  }
256 
257  if (ledger->info().hash != ledgerHash)
258  {
259  JLOG(journal_.fatal())
260  << __func__ << " : "
261  << "Ledger hash does not match. "
262  << "Expected hash = " << strHex(ledgerHash)
263  << "Actual hash = " << strHex(ledger->info().hash);
264  Throw<std::runtime_error>("ledger hash mismatch");
265  }
266 
267  JLOG(journal_.info()) << __func__ << " : "
268  << "Successfully flushed ledger! "
269  << detail::toString(ledger->info());
270 }
271 
272 void
274 {
275  app_.getOPs().pubLedger(ledger);
276 
277  setLastPublish();
278 }
279 
280 bool
281 ReportingETL::publishLedger(uint32_t ledgerSequence, uint32_t maxAttempts)
282 {
283  JLOG(journal_.info()) << __func__ << " : "
284  << "Attempting to publish ledger = "
285  << ledgerSequence;
286  size_t numAttempts = 0;
287  while (!stopping_)
288  {
289  auto ledger = app_.getLedgerMaster().getLedgerBySeq(ledgerSequence);
290 
291  if (!ledger)
292  {
293  JLOG(journal_.warn())
294  << __func__ << " : "
295  << "Trying to publish. Could not find ledger with sequence = "
296  << ledgerSequence;
297  // We try maxAttempts times to publish the ledger, waiting one
298  // second in between each attempt.
299  // If the ledger is not present in the database after maxAttempts,
300  // we attempt to take over as the writer. If the takeover fails,
301  // doContinuousETL will return, and this node will go back to
302  // publishing.
303  // If the node is in strict read only mode, we simply
304  // skip publishing this ledger and return false indicating the
305  // publish failed
306  if (numAttempts >= maxAttempts)
307  {
308  JLOG(journal_.error()) << __func__ << " : "
309  << "Failed to publish ledger after "
310  << numAttempts << " attempts.";
311  if (!readOnly_)
312  {
313  JLOG(journal_.info()) << __func__ << " : "
314  << "Attempting to become ETL writer";
315  return false;
316  }
317  else
318  {
319  JLOG(journal_.debug())
320  << __func__ << " : "
321  << "In strict read-only mode. "
322  << "Skipping publishing this ledger. "
323  << "Beginning fast forward.";
324  return false;
325  }
326  }
327  else
328  {
330  ++numAttempts;
331  }
332  continue;
333  }
334 
335  publishStrand_.post([this, ledger]() {
336  app_.getOPs().pubLedger(ledger);
337  setLastPublish();
338  JLOG(journal_.info())
339  << __func__ << " : "
340  << "Published ledger. " << detail::toString(ledger->info());
341  });
342  return true;
343  }
344  return false;
345 }
346 
349 {
350  JLOG(journal_.debug()) << __func__ << " : "
351  << "Attempting to fetch ledger with sequence = "
352  << idx;
353 
355  loadBalancer_.fetchLedger(idx, false);
356  JLOG(journal_.trace()) << __func__ << " : "
357  << "GetLedger reply = " << response->DebugString();
358  return response;
359 }
360 
363 {
364  JLOG(journal_.debug()) << __func__ << " : "
365  << "Attempting to fetch ledger with sequence = "
366  << idx;
367 
369  loadBalancer_.fetchLedger(idx, true);
370  JLOG(journal_.trace()) << __func__ << " : "
371  << "GetLedger reply = " << response->DebugString();
372  return response;
373 }
374 
378  org::xrpl::rpc::v1::GetLedgerResponse& rawData)
379 {
380  JLOG(journal_.info()) << __func__ << " : "
381  << "Beginning ledger update";
382 
383  LedgerInfo lgrInfo =
384  deserializeHeader(makeSlice(rawData.ledger_header()), true);
385 
386  JLOG(journal_.debug()) << __func__ << " : "
387  << "Deserialized ledger header. "
388  << detail::toString(lgrInfo);
389 
390  next->setLedgerInfo(lgrInfo);
391 
392  next->stateMap().clearSynching();
393  next->txMap().clearSynching();
394 
396  insertTransactions(next, rawData)};
397 
398  JLOG(journal_.debug())
399  << __func__ << " : "
400  << "Inserted all transactions. Number of transactions = "
401  << rawData.transactions_list().transactions_size();
402 
403  for (auto& obj : rawData.ledger_objects().objects())
404  {
405  auto key = uint256::fromVoid(obj.key().data());
406  auto& data = obj.data();
407 
408  // indicates object was deleted
409  if (data.size() == 0)
410  {
411  JLOG(journal_.trace()) << __func__ << " : "
412  << "Erasing object = " << key;
413  if (next->exists(key))
414  next->rawErase(key);
415  }
416  else
417  {
418  SerialIter it{data.data(), data.size()};
419  std::shared_ptr<SLE> sle = std::make_shared<SLE>(it, key);
420 
421  if (next->exists(key))
422  {
423  JLOG(journal_.trace()) << __func__ << " : "
424  << "Replacing object = " << key;
425  next->rawReplace(sle);
426  }
427  else
428  {
429  JLOG(journal_.trace()) << __func__ << " : "
430  << "Inserting object = " << key;
431  next->rawInsert(sle);
432  }
433  }
434  }
435  JLOG(journal_.debug())
436  << __func__ << " : "
437  << "Inserted/modified/deleted all objects. Number of objects = "
438  << rawData.ledger_objects().objects_size();
439 
440  if (!rawData.skiplist_included())
441  {
442  next->updateSkipList();
443  JLOG(journal_.warn())
444  << __func__ << " : "
445  << "tx process is not sending skiplist. This indicates that the tx "
446  "process is parsing metadata instead of doing a SHAMap diff. "
447  "Make sure tx process is running the same code as reporting to "
448  "use SHAMap diff instead of parsing metadata";
449  }
450 
451  JLOG(journal_.debug()) << __func__ << " : "
452  << "Finished ledger update. "
453  << detail::toString(next->info());
454  return {std::move(next), std::move(accountTxData)};
455 }
456 
457 // Database must be populated when this starts
459 ReportingETL::runETLPipeline(uint32_t startSequence)
460 {
461  /*
462  * Behold, mortals! This function spawns three separate threads, which talk
463  * to each other via 2 different thread safe queues and 1 atomic variable.
464  * All threads and queues are function local. This function returns when all
465  * of the threads exit. There are two termination conditions: the first is
466  * if the load thread encounters a write conflict. In this case, the load
467  * thread sets writeConflict, an atomic bool, to true, which signals the
468  * other threads to stop. The second termination condition is when the
469  * entire server is shutting down, which is detected in one of three ways:
470  * 1. isStopping() returns true if the server is shutting down
471  * 2. networkValidatedLedgers_.waitUntilValidatedByNetwork returns
472  * false, signaling the wait was aborted.
473  * 3. fetchLedgerDataAndDiff returns an empty optional, signaling the fetch
474  * was aborted.
475  * In all cases, the extract thread detects this condition,
476  * and pushes an empty optional onto the transform queue. The transform
477  * thread, upon popping an empty optional, pushes an empty optional onto the
478  * load queue, and then returns. The load thread, upon popping an empty
479  * optional, returns.
480  */
481 
482  JLOG(journal_.debug()) << __func__ << " : "
483  << "Starting etl pipeline";
484  writing_ = true;
485 
486  std::shared_ptr<Ledger> parent = std::const_pointer_cast<Ledger>(
487  app_.getLedgerMaster().getLedgerBySeq(startSequence - 1));
488  if (!parent)
489  {
490  assert(false);
491  Throw<std::runtime_error>("runETLPipeline: parent ledger is null");
492  }
493 
494  std::atomic_bool writeConflict = false;
495  std::optional<uint32_t> lastPublishedSequence;
496  constexpr uint32_t maxQueueSize = 1000;
497 
499  transformQueue{maxQueueSize};
500 
501  std::thread extracter{[this,
502  &startSequence,
503  &writeConflict,
504  &transformQueue]() {
505  beast::setCurrentThreadName("rippled: ReportingETL extract");
506  uint32_t currentSequence = startSequence;
507 
508  // there are two stopping conditions here.
509  // First, if there is a write conflict in the load thread, the ETL
510  // mechanism should stop.
511  // The other stopping condition is if the entire server is shutting
512  // down. This can be detected in a variety of ways. See the comment
513  // at the top of the function
515  currentSequence) &&
516  !writeConflict && !isStopping())
517  {
518  auto start = std::chrono::system_clock::now();
520  fetchLedgerDataAndDiff(currentSequence)};
521  auto end = std::chrono::system_clock::now();
522 
523  auto time = ((end - start).count()) / 1000000000.0;
524  auto tps =
525  fetchResponse->transactions_list().transactions_size() / time;
526 
527  JLOG(journal_.debug()) << "Extract phase time = " << time
528  << " . Extract phase tps = " << tps;
529  // if the fetch is unsuccessful, stop. fetchLedger only returns
530  // false if the server is shutting down, or if the ledger was
531  // found in the database (which means another process already
532  // wrote the ledger that this process was trying to extract;
533  // this is a form of a write conflict). Otherwise,
534  // fetchLedgerDataAndDiff will keep trying to fetch the
535  // specified ledger until successful
536  if (!fetchResponse)
537  {
538  break;
539  }
540 
541  transformQueue.push(std::move(fetchResponse));
542  ++currentSequence;
543  }
544  // empty optional tells the transformer to shut down
545  transformQueue.push({});
546  }};
547 
551  loadQueue{maxQueueSize};
552  std::thread transformer{[this,
553  &parent,
554  &writeConflict,
555  &loadQueue,
556  &transformQueue]() {
557  beast::setCurrentThreadName("rippled: ReportingETL transform");
558 
559  assert(parent);
560  parent = std::make_shared<Ledger>(*parent, NetClock::time_point{});
561  while (!writeConflict)
562  {
564  transformQueue.pop()};
565  // if fetchResponse is an empty optional, the extracter thread has
566  // stopped and the transformer should stop as well
567  if (!fetchResponse)
568  {
569  break;
570  }
571  if (isStopping())
572  continue;
573 
574  auto start = std::chrono::system_clock::now();
575  auto [next, accountTxData] =
576  buildNextLedger(parent, *fetchResponse);
577  auto end = std::chrono::system_clock::now();
578 
579  auto duration = ((end - start).count()) / 1000000000.0;
580  JLOG(journal_.debug()) << "transform time = " << duration;
581  // The below line needs to execute before pushing to the queue, in
582  // order to prevent this thread and the loader thread from accessing
583  // the same SHAMap concurrently
584  parent = std::make_shared<Ledger>(*next, NetClock::time_point{});
585  loadQueue.push(
586  std::make_pair(std::move(next), std::move(accountTxData)));
587  }
588  // empty optional tells the loader to shutdown
589  loadQueue.push({});
590  }};
591 
592  std::thread loader{[this,
593  &lastPublishedSequence,
594  &loadQueue,
595  &writeConflict]() {
596  beast::setCurrentThreadName("rippled: ReportingETL load");
597  size_t totalTransactions = 0;
598  double totalTime = 0;
599  while (!writeConflict)
600  {
604  result{loadQueue.pop()};
605  // if result is an empty optional, the transformer thread has
606  // stopped and the loader should stop as well
607  if (!result)
608  break;
609  if (isStopping())
610  continue;
611 
612  auto& ledger = result->first;
613  auto& accountTxData = result->second;
614 
615  auto start = std::chrono::system_clock::now();
616  // write to the key-value store
617  flushLedger(ledger);
618 
619  auto mid = std::chrono::system_clock::now();
620  // write to RDBMS
621  // if there is a write conflict, some other process has already
622  // written this ledger and has taken over as the ETL writer
623 #ifdef RIPPLED_REPORTING
624  if (!writeToPostgres(
625  ledger->info(), accountTxData, app_.getPgPool(), journal_))
626  writeConflict = true;
627 #endif
628 
629  auto end = std::chrono::system_clock::now();
630 
631  if (!writeConflict)
632  {
633  publishLedger(ledger);
634  lastPublishedSequence = ledger->info().seq;
635  }
636  // print some performance numbers
637  auto kvTime = ((mid - start).count()) / 1000000000.0;
638  auto relationalTime = ((end - mid).count()) / 1000000000.0;
639 
640  size_t numTxns = accountTxData.size();
641  totalTime += kvTime;
642  totalTransactions += numTxns;
643  JLOG(journal_.info())
644  << "Load phase of etl : "
645  << "Successfully published ledger! Ledger info: "
646  << detail::toString(ledger->info())
647  << ". txn count = " << numTxns
648  << ". key-value write time = " << kvTime
649  << ". relational write time = " << relationalTime
650  << ". key-value tps = " << numTxns / kvTime
651  << ". relational tps = " << numTxns / relationalTime
652  << ". total key-value tps = " << totalTransactions / totalTime;
653  }
654  }};
655 
656  // wait for all of the threads to stop
657  loader.join();
658  extracter.join();
659  transformer.join();
660  writing_ = false;
661 
662  JLOG(journal_.debug()) << __func__ << " : "
663  << "Stopping etl pipeline";
664 
665  return lastPublishedSequence;
666 }
667 
668 // main loop. The software begins monitoring the ledgers that are validated
669 // by the nework. The member networkValidatedLedgers_ keeps track of the
670 // sequences of ledgers validated by the network. Whenever a ledger is validated
671 // by the network, the software looks for that ledger in the database. Once the
672 // ledger is found in the database, the software publishes that ledger to the
673 // ledgers stream. If a network validated ledger is not found in the database
674 // after a certain amount of time, then the software attempts to take over
675 // responsibility of the ETL process, where it writes new ledgers to the
676 // database. The software will relinquish control of the ETL process if it
677 // detects that another process has taken over ETL.
678 void
680 {
681  auto ledger = std::const_pointer_cast<Ledger>(
683  if (!ledger)
684  {
685  JLOG(journal_.info()) << __func__ << " : "
686  << "Database is empty. Will download a ledger "
687  "from the network.";
688  if (startSequence_)
689  {
690  JLOG(journal_.info())
691  << __func__ << " : "
692  << "ledger sequence specified in config. "
693  << "Will begin ETL process starting with ledger "
694  << *startSequence_;
696  }
697  else
698  {
699  JLOG(journal_.info())
700  << __func__ << " : "
701  << "Waiting for next ledger to be validated by network...";
702  std::optional<uint32_t> mostRecentValidated =
704  if (mostRecentValidated)
705  {
706  JLOG(journal_.info()) << __func__ << " : "
707  << "Ledger " << *mostRecentValidated
708  << " has been validated. "
709  << "Downloading...";
710  ledger = loadInitialLedger(*mostRecentValidated);
711  }
712  else
713  {
714  JLOG(journal_.info()) << __func__ << " : "
715  << "The wait for the next validated "
716  << "ledger has been aborted. "
717  << "Exiting monitor loop";
718  return;
719  }
720  }
721  }
722  else
723  {
724  if (startSequence_)
725  {
726  Throw<std::runtime_error>(
727  "start sequence specified but db is already populated");
728  }
729  JLOG(journal_.info())
730  << __func__ << " : "
731  << "Database already populated. Picking up from the tip of history";
732  }
733  if (!ledger)
734  {
735  JLOG(journal_.error())
736  << __func__ << " : "
737  << "Failed to load initial ledger. Exiting monitor loop";
738  return;
739  }
740  else
741  {
742  publishLedger(ledger);
743  }
744  uint32_t nextSequence = ledger->info().seq + 1;
745 
746  JLOG(journal_.debug()) << __func__ << " : "
747  << "Database is populated. "
748  << "Starting monitor loop. sequence = "
749  << nextSequence;
750  while (!stopping_ &&
752  {
753  JLOG(journal_.info()) << __func__ << " : "
754  << "Ledger with sequence = " << nextSequence
755  << " has been validated by the network. "
756  << "Attempting to find in database and publish";
757  // Attempt to take over responsibility of ETL writer after 10 failed
758  // attempts to publish the ledger. publishLedger() fails if the
759  // ledger that has been validated by the network is not found in the
760  // database after the specified number of attempts. publishLedger()
761  // waits one second between each attempt to read the ledger from the
762  // database
763  //
764  // In strict read-only mode, when the software fails to find a
765  // ledger in the database that has been validated by the network,
766  // the software will only try to publish subsequent ledgers once,
767  // until one of those ledgers is found in the database. Once the
768  // software successfully publishes a ledger, the software will fall
769  // back to the normal behavior of trying several times to publish
770  // the ledger that has been validated by the network. In this
771  // manner, a reporting processing running in read-only mode does not
772  // need to restart if the database is wiped.
773  constexpr size_t timeoutSeconds = 10;
774  bool success = publishLedger(nextSequence, timeoutSeconds);
775  if (!success)
776  {
777  JLOG(journal_.warn())
778  << __func__ << " : "
779  << "Failed to publish ledger with sequence = " << nextSequence
780  << " . Beginning ETL";
781  // doContinousETLPipelined returns the most recent sequence
782  // published empty optional if no sequence was published
783  std::optional<uint32_t> lastPublished =
784  runETLPipeline(nextSequence);
785  JLOG(journal_.info()) << __func__ << " : "
786  << "Aborting ETL. Falling back to publishing";
787  // if no ledger was published, don't increment nextSequence
788  if (lastPublished)
789  nextSequence = *lastPublished + 1;
790  }
791  else
792  {
793  ++nextSequence;
794  }
795  }
796 }
797 
798 void
800 {
801  JLOG(journal_.debug()) << "Starting reporting in strict read only mode";
802  std::optional<uint32_t> mostRecent =
804  if (!mostRecent)
805  return;
806  uint32_t sequence = *mostRecent;
807  bool success = true;
808  while (!stopping_ &&
810  {
811  success = publishLedger(sequence, success ? 30 : 1);
812  ++sequence;
813  }
814 }
815 
816 void
818 {
819  worker_ = std::thread([this]() {
820  beast::setCurrentThreadName("rippled: ReportingETL worker");
821  if (readOnly_)
822  monitorReadOnly();
823  else
824  monitor();
825  });
826 }
827 
829  : Stoppable("ReportingETL", parent)
830  , app_(app)
831  , journal_(app.journal("ReportingETL"))
832  , publishStrand_(app_.getIOService())
833  , loadBalancer_(*this)
834 {
835  // if present, get endpoint from config
836  if (app_.config().exists("reporting"))
837  {
838  Section section = app_.config().section("reporting");
839 
840  JLOG(journal_.debug()) << "Parsing config info";
841 
842  auto& vals = section.values();
843  for (auto& v : vals)
844  {
845  JLOG(journal_.debug()) << "val is " << v;
846  Section source = app_.config().section(v);
847 
848  std::pair<std::string, bool> ipPair = source.find("source_ip");
849  if (!ipPair.second)
850  continue;
851 
852  std::pair<std::string, bool> wsPortPair =
853  source.find("source_ws_port");
854  if (!wsPortPair.second)
855  continue;
856 
857  std::pair<std::string, bool> grpcPortPair =
858  source.find("source_grpc_port");
859  if (!grpcPortPair.second)
860  {
861  // add source without grpc port
862  // used in read-only mode to detect when new ledgers have
863  // been validated. Used for publishing
864  if (app_.config().reportingReadOnly())
865  loadBalancer_.add(ipPair.first, wsPortPair.first);
866  continue;
867  }
868 
870  ipPair.first, wsPortPair.first, grpcPortPair.first);
871  }
872 
873  // this is true iff --reportingReadOnly was passed via command line
875 
876  // if --reportingReadOnly was not passed via command line, check config
877  // file. Command line takes precedence
878  if (!readOnly_)
879  {
880  std::pair<std::string, bool> ro = section.find("read_only");
881  if (ro.second)
882  {
883  readOnly_ = (ro.first == "true" || ro.first == "1");
885  }
886  }
887 
888  // handle command line arguments
889  if (app_.config().START_UP == Config::StartUpType::FRESH && !readOnly_)
890  {
892  }
893  // if not passed via command line, check config for start sequence
894  if (!startSequence_)
895  {
896  std::pair<std::string, bool> start = section.find("start_sequence");
897  if (start.second)
898  {
899  startSequence_ = std::stoi(start.first);
900  }
901  }
902 
903  std::pair<std::string, bool> flushInterval =
904  section.find("flush_interval");
905  if (flushInterval.second)
906  flushInterval_ = std::stoi(flushInterval.first);
907 
908  std::pair<std::string, bool> numMarkers = section.find("num_markers");
909  if (numMarkers.second)
910  numMarkers_ = std::stoi(numMarkers.first);
911  }
912 }
913 
914 } // namespace ripple
ripple::NetworkOPs::pubLedger
virtual void pubLedger(std::shared_ptr< ReadView const > const &lpAccepted)=0
beast::Journal::fatal
Stream fatal() const
Definition: Journal.h:339
ripple::ReportingETL::flushInterval_
size_t flushInterval_
Used to determine when to write to the database during the initial ledger download.
Definition: ReportingETL.h:115
ripple::Section
Holds a collection of configuration values.
Definition: BasicConfig.h:43
ripple::Application
Definition: Application.h:101
ripple::Application::getNodeFamily
virtual Family & getNodeFamily()=0
ripple::HashPrefix::ledgerMaster
@ ledgerMaster
ledger master data for signing
std::this_thread::sleep_for
T sleep_for(T... args)
ripple::makeSlice
std::enable_if_t< std::is_same< T, char >::value||std::is_same< T, unsigned char >::value, Slice > makeSlice(std::array< T, N > const &a)
Definition: Slice.h:240
ripple::ReportingETL::fetchLedgerData
std::optional< org::xrpl::rpc::v1::GetLedgerResponse > fetchLedgerData(uint32_t sequence)
Extract data for a particular ledger from an ETL source.
Definition: ReportingETL.cpp:348
ripple::ReportingETL::loadInitialLedger
std::shared_ptr< Ledger > loadInitialLedger(uint32_t sequence)
Download a ledger with specified sequence in full, via GetLedgerData, and write the data to the datab...
Definition: ReportingETL.cpp:105
ripple::ReportingETL::startSequence_
std::optional< uint32_t > startSequence_
Ledger sequence to start ETL from.
Definition: ReportingETL.h:139
std::string
STL class.
std::shared_ptr< Ledger >
ripple::LedgerInfo::parentHash
uint256 parentHash
Definition: ReadView.h:103
ripple::ThreadSafeQueue
Generic thread-safe queue with an optional maximum size Note, we can't use a lockfree queue here,...
Definition: ETLHelpers.h:105
beast::Journal::trace
Stream trace() const
Severity stream access functions.
Definition: Journal.h:309
ripple::ReportingETL::ReportingETL
ReportingETL(Application &app, Stoppable &parent)
Definition: ReportingETL.cpp:828
ripple::Serializer::modData
Blob & modData()
Definition: Serializer.h:176
std::pair
ripple::ReportingETL::setLastPublish
void setLastPublish()
Definition: ReportingETL.h:155
ripple::LedgerInfo::hash
uint256 hash
Definition: ReadView.h:100
ripple::ReportingETL::insertTransactions
std::vector< AccountTransactionsData > insertTransactions(std::shared_ptr< Ledger > &ledger, org::xrpl::rpc::v1::GetLedgerResponse &data)
Insert all of the extracted transactions into the ledger.
Definition: ReportingETL.cpp:74
ripple::ThreadSafeQueue::push
void push(T const &elt)
Definition: ETLHelpers.h:126
ripple::addRaw
void addRaw(LedgerInfo const &info, Serializer &s, bool includeHash)
Definition: View.cpp:43
ripple::ReportingETL::flushLedger
void flushLedger(std::shared_ptr< Ledger > &ledger)
Write all new data to the key-value store.
Definition: ReportingETL.cpp:180
ripple::hotACCOUNT_NODE
@ hotACCOUNT_NODE
Definition: NodeObject.h:35
std::vector
STL class.
ripple::ReportingETL::loadBalancer_
ETLLoadBalancer loadBalancer_
Mechanism for communicating with ETL sources.
Definition: ReportingETL.h:96
std::chrono::seconds
ripple::ETLLoadBalancer::fetchLedger
std::optional< org::xrpl::rpc::v1::GetLedgerResponse > fetchLedger(uint32_t ledgerSequence, bool getObjects)
Fetch data for a specific ledger.
Definition: ETLSource.cpp:665
ripple::NetworkValidatedLedgers::waitUntilValidatedByNetwork
bool waitUntilValidatedByNetwork(uint32_t sequence)
Waits for the sequence to be validated by the network.
Definition: ETLHelpers.h:79
ripple::NodeStore::Database::sync
virtual void sync()=0
std::stringstream
STL class.
beast::Journal::warn
Stream warn() const
Definition: Journal.h:327
ripple::NodeStore::Database::store
virtual void store(NodeObjectType type, Blob &&data, uint256 const &hash, std::uint32_t ledgerSeq)=0
Store the object.
ripple::LedgerInfo::seq
LedgerIndex seq
Definition: ReadView.h:92
ripple::hotTRANSACTION_NODE
@ hotTRANSACTION_NODE
Definition: NodeObject.h:36
iostream
ripple::LedgerInfo::txHash
uint256 txHash
Definition: ReadView.h:101
ripple::Application::getOPs
virtual NetworkOPs & getOPs()=0
ripple::Section::values
std::vector< std::string > const & values() const
Returns all the values in the section.
Definition: BasicConfig.h:76
ripple::ReportingETL::networkValidatedLedgers_
NetworkValidatedLedgers networkValidatedLedgers_
Mechanism for detecting when the network has validated a new ledger.
Definition: ReportingETL.h:100
ripple::TxMeta
Definition: TxMeta.h:32
ripple::base_uint< 256 >
ripple::ReportingETL::publishLedger
bool publishLedger(uint32_t ledgerSequence, uint32_t maxAttempts=10)
Attempt to read the specified ledger from the database, and then publish that ledger to the ledgers s...
Definition: ReportingETL.cpp:281
ripple::ReportingETL::journal_
beast::Journal journal_
Definition: ReportingETL.h:75
std::stol
T stol(T... args)
ripple::Config::reporting
bool reporting() const
Definition: Config.h:267
ripple::Stoppable
Provides an interface for starting and stopping.
Definition: Stoppable.h:201
std::thread
STL class.
ripple::ReportingETL::isStopping
bool isStopping()
Definition: ReportingETL.h:282
ripple::Application::getLedgerMaster
virtual LedgerMaster & getLedgerMaster()=0
ripple::ReportingETL::writing_
std::atomic_bool writing_
Whether the process is writing to the database. Used by server_info.
Definition: ReportingETL.h:134
ripple::ReportingETL::readOnly_
bool readOnly_
Whether the process is in strict read-only mode.
Definition: ReportingETL.h:131
ripple::ReportingETL::numMarkers_
size_t numMarkers_
This variable controls the number of GetLedgerData calls that will be executed in parallel during the...
Definition: ReportingETL.h:126
ripple::ETLLoadBalancer::loadInitialLedger
void loadInitialLedger(uint32_t sequence, ThreadSafeQueue< std::shared_ptr< SLE >> &writeQueue)
Load the initial ledger, writing data to the queue.
Definition: ETLSource.cpp:646
ripple::Application::config
virtual Config & config()=0
ripple::ReportingETL::monitorReadOnly
void monitorReadOnly()
Monitor the database for newly written ledgers.
Definition: ReportingETL.cpp:799
ripple::detail::toString
std::string toString(LedgerInfo const &info)
Convenience function for printing out basic ledger info.
Definition: ReportingETL.cpp:40
beast::Journal::error
Stream error() const
Definition: Journal.h:333
beast::Journal::info
Stream info() const
Definition: Journal.h:321
std::chrono::time_point
ripple::ReportingETL::publishStrand_
boost::asio::io_context::strand publishStrand_
Strand to ensure that ledgers are published in order.
Definition: ReportingETL.h:91
ripple::STTx
Definition: STTx.h:42
ripple::LedgerMaster::getLedgerBySeq
std::shared_ptr< Ledger const > getLedgerBySeq(std::uint32_t index)
Definition: LedgerMaster.cpp:1740
ripple::SerialIter
Definition: Serializer.h:308
ripple::Config::START_UP
StartUpType START_UP
Definition: Config.h:129
std::atomic_bool
ripple::ReportingETL::fetchLedgerDataAndDiff
std::optional< org::xrpl::rpc::v1::GetLedgerResponse > fetchLedgerDataAndDiff(uint32_t sequence)
Extract data for a particular ledger from an ETL source.
Definition: ReportingETL.cpp:362
ripple::NetworkValidatedLedgers::getMostRecent
std::optional< uint32_t > getMostRecent()
Get most recently validated sequence.
Definition: ETLHelpers.h:67
ripple::Section::find
std::pair< std::string, bool > find(std::string const &name) const
Retrieve a key/value pair.
Definition: BasicConfig.cpp:113
ripple::Serializer
Definition: Serializer.h:39
ripple::Config::setReportingReadOnly
void setReportingReadOnly(bool b)
Definition: Config.h:285
beast::setCurrentThreadName
void setCurrentThreadName(std::string_view name)
Changes the name of the caller thread.
Definition: CurrentThreadName.cpp:119
std::vector::emplace_back
T emplace_back(T... args)
ripple
Use hash_* containers for keys that do not need a cryptographically secure hashing algorithm.
Definition: RCLCensorshipDetector.h:29
ripple::ReportingETL::stopping_
std::atomic_bool stopping_
Whether the software is stopping.
Definition: ReportingETL.h:103
ripple::Application::getNodeStore
virtual NodeStore::Database & getNodeStore()=0
ripple::deserializeHeader
LedgerInfo deserializeHeader(Slice data, bool hasHash)
Deserialize a ledger header from a byte array.
Definition: InboundLedger.cpp:277
cstdlib
ripple::base_uint< 256 >::fromVoid
static base_uint fromVoid(void const *data)
Definition: base_uint.h:223
ripple::LedgerMaster::getValidatedLedger
std::shared_ptr< Ledger const > getValidatedLedger()
Definition: LedgerMaster.cpp:1581
ripple::ReportingETL::app_
Application & app_
Definition: ReportingETL.h:73
std::optional
std::stringstream::str
T str(T... args)
beast::Journal::debug
Stream debug() const
Definition: Journal.h:315
ripple::ReportingETL::worker_
std::thread worker_
Definition: ReportingETL.h:77
ripple::hotLEDGER
@ hotLEDGER
Definition: NodeObject.h:34
std::make_pair
T make_pair(T... args)
ripple::Serializer::add32
int add32(std::uint32_t i)
Definition: Serializer.cpp:38
ripple::LedgerInfo
Information about the notional ledger backing the view.
Definition: ReadView.h:84
ripple::strHex
std::string strHex(FwdIt begin, FwdIt end)
Definition: strHex.h:45
ripple::Config::reportingReadOnly
bool reportingReadOnly() const
Definition: Config.h:279
ripple::ReportingETL::buildNextLedger
std::pair< std::shared_ptr< Ledger >, std::vector< AccountTransactionsData > > buildNextLedger(std::shared_ptr< Ledger > &parent, org::xrpl::rpc::v1::GetLedgerResponse &rawData)
Build the next ledger using the previous ledger and the extracted data.
Definition: ReportingETL.cpp:376
ripple::ReportingETL::monitor
void monitor()
Monitor the network for newly validated ledgers.
Definition: ReportingETL.cpp:679
ripple::Config::START_LEDGER
std::string START_LEDGER
Definition: Config.h:133
ripple::ReportingETL::consumeLedgerData
void consumeLedgerData(std::shared_ptr< Ledger > &ledger, ThreadSafeQueue< std::shared_ptr< SLE >> &writeQueue)
Consume data from a queue and insert that data into the ledger This function will continue to pull fr...
Definition: ReportingETL.cpp:52
ripple::ReportingETL::doWork
void doWork()
Definition: ReportingETL.cpp:817
ripple::LedgerInfo::accountHash
uint256 accountHash
Definition: ReadView.h:102
ripple::ReportingETL::runETLPipeline
std::optional< uint32_t > runETLPipeline(uint32_t startSequence)
Run ETL.
Definition: ReportingETL.cpp:459
ripple::BasicConfig::exists
bool exists(std::string const &name) const
Returns true if a section with the given name exists.
Definition: BasicConfig.cpp:132
ripple::BasicConfig::section
Section & section(std::string const &name)
Returns the section with the given name.
Definition: BasicConfig.cpp:138
variant
string
ripple::ETLLoadBalancer::add
void add(std::string &host, std::string &websocketPort, std::string &grpcPort)
Add an ETL source.
Definition: ETLSource.cpp:623
std::chrono::system_clock::now
T now(T... args)