rippled
ReportingETL.cpp
1 //------------------------------------------------------------------------------
2 /*
3  This file is part of rippled: https://github.com/ripple/rippled
4  Copyright (c) 2020 Ripple Labs Inc.
5 
6  Permission to use, copy, modify, and/or distribute this software for any
7  purpose with or without fee is hereby granted, provided that the above
8  copyright notice and this permission notice appear in all copies.
9 
10  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 //==============================================================================
19 
20 #include <ripple/app/rdb/backend/PostgresDatabase.h>
21 #include <ripple/app/reporting/ReportingETL.h>
22 #include <ripple/basics/ThreadUtilities.h>
23 #include <ripple/json/json_reader.h>
24 #include <ripple/json/json_writer.h>
25 #include <boost/asio/connect.hpp>
26 #include <boost/asio/ip/tcp.hpp>
27 #include <boost/beast/core.hpp>
28 #include <boost/beast/websocket.hpp>
29 #include <cctype>
30 #include <charconv>
31 #include <cstdlib>
32 #include <iostream>
33 #include <string>
34 #include <variant>
35 
36 namespace ripple {
37 
38 namespace detail {
41 toString(LedgerInfo const& info)
42 {
44  ss << "LedgerInfo { Sequence : " << info.seq
45  << " Hash : " << strHex(info.hash) << " TxHash : " << strHex(info.txHash)
46  << " AccountHash : " << strHex(info.accountHash)
47  << " ParentHash : " << strHex(info.parentHash) << " }";
48  return ss.str();
49 }
50 } // namespace detail
51 
52 void
56 {
58  size_t num = 0;
59  while (!stopping_ && (sle = writeQueue.pop()))
60  {
61  assert(sle);
62  if (!ledger->exists(sle->key()))
63  ledger->rawInsert(sle);
64 
65  if (flushInterval_ != 0 && (num % flushInterval_) == 0)
66  {
67  JLOG(journal_.debug()) << "Flushing! key = " << strHex(sle->key());
68  ledger->stateMap().flushDirty(hotACCOUNT_NODE);
69  }
70  ++num;
71  }
72 }
73 
77  org::xrpl::rpc::v1::GetLedgerResponse& data)
78 {
80  for (auto& txn : data.transactions_list().transactions())
81  {
82  auto& raw = txn.transaction_blob();
83 
84  SerialIter it{raw.data(), raw.size()};
85  STTx sttx{it};
86 
87  auto txSerializer = std::make_shared<Serializer>(sttx.getSerializer());
88 
89  TxMeta txMeta{
90  sttx.getTransactionID(), ledger->info().seq, txn.metadata_blob()};
91 
92  auto metaSerializer =
93  std::make_shared<Serializer>(txMeta.getAsObject().getSerializer());
94 
95  JLOG(journal_.trace())
96  << __func__ << " : "
97  << "Inserting transaction = " << sttx.getTransactionID();
98  uint256 nodestoreHash = ledger->rawTxInsertWithHash(
99  sttx.getTransactionID(), txSerializer, metaSerializer);
100  accountTxData.emplace_back(txMeta, std::move(nodestoreHash), journal_);
101  }
102  return accountTxData;
103 }
104 
106 ReportingETL::loadInitialLedger(uint32_t startingSequence)
107 {
108  // check that database is actually empty
109  auto ledger = std::const_pointer_cast<Ledger>(
111  if (ledger)
112  {
113  JLOG(journal_.fatal()) << __func__ << " : "
114  << "Database is not empty";
115  assert(false);
116  return {};
117  }
118 
119  // fetch the ledger from the network. This function will not return until
120  // either the fetch is successful, or the server is being shutdown. This
121  // only fetches the ledger header and the transactions+metadata
123  fetchLedgerData(startingSequence)};
124  if (!ledgerData)
125  return {};
126 
127  LedgerInfo lgrInfo =
128  deserializeHeader(makeSlice(ledgerData->ledger_header()), true);
129 
130  JLOG(journal_.debug()) << __func__ << " : "
131  << "Deserialized ledger header. "
132  << detail::toString(lgrInfo);
133 
134  ledger =
135  std::make_shared<Ledger>(lgrInfo, app_.config(), app_.getNodeFamily());
136  ledger->stateMap().clearSynching();
137  ledger->txMap().clearSynching();
138 
139 #ifdef RIPPLED_REPORTING
141  insertTransactions(ledger, *ledgerData);
142 #endif
143 
145 
147  std::thread asyncWriter{[this, &ledger, &writeQueue]() {
148  consumeLedgerData(ledger, writeQueue);
149  }};
150 
151  // download the full account state map. This function downloads full ledger
152  // data and pushes the downloaded data into the writeQueue. asyncWriter
153  // consumes from the queue and inserts the data into the Ledger object.
154  // Once the below call returns, all data has been pushed into the queue
155  loadBalancer_.loadInitialLedger(startingSequence, writeQueue);
156 
157  // null is used to represent the end of the queue
159  writeQueue.push(null);
160  // wait for the writer to finish
161  asyncWriter.join();
162 
163  if (!stopping_)
164  {
165  flushLedger(ledger);
166  if (app_.config().reporting())
167  {
168 #ifdef RIPPLED_REPORTING
169  dynamic_cast<PostgresDatabase*>(&app_.getRelationalDatabase())
170  ->writeLedgerAndTransactions(ledger->info(), accountTxData);
171 #endif
172  }
173  }
174  auto end = std::chrono::system_clock::now();
175  JLOG(journal_.debug()) << "Time to download and store ledger = "
176  << ((end - start).count()) / 1000000000.0;
177  return ledger;
178 }
179 
180 void
182 {
183  JLOG(journal_.debug()) << __func__ << " : "
184  << "Flushing ledger. "
185  << detail::toString(ledger->info());
186  // These are recomputed in setImmutable
187  auto& accountHash = ledger->info().accountHash;
188  auto& txHash = ledger->info().txHash;
189  auto& ledgerHash = ledger->info().hash;
190 
191  assert(
192  ledger->info().seq < XRP_LEDGER_EARLIEST_FEES ||
193  ledger->read(keylet::fees()));
194  ledger->setImmutable(false);
196 
197  auto numFlushed = ledger->stateMap().flushDirty(hotACCOUNT_NODE);
198 
199  auto numTxFlushed = ledger->txMap().flushDirty(hotTRANSACTION_NODE);
200 
201  {
202  Serializer s(128);
204  addRaw(ledger->info(), s);
206  hotLEDGER,
207  std::move(s.modData()),
208  ledger->info().hash,
209  ledger->info().seq);
210  }
211 
212  app_.getNodeStore().sync();
213 
214  auto end = std::chrono::system_clock::now();
215 
216  JLOG(journal_.debug()) << __func__ << " : "
217  << "Flushed " << numFlushed
218  << " nodes to nodestore from stateMap";
219  JLOG(journal_.debug()) << __func__ << " : "
220  << "Flushed " << numTxFlushed
221  << " nodes to nodestore from txMap";
222 
223  JLOG(journal_.debug()) << __func__ << " : "
224  << "Flush took "
225  << (end - start).count() / 1000000000.0
226  << " seconds";
227 
228  if (numFlushed == 0)
229  {
230  JLOG(journal_.fatal()) << __func__ << " : "
231  << "Flushed 0 nodes from state map";
232  assert(false);
233  }
234  if (numTxFlushed == 0)
235  {
236  JLOG(journal_.warn()) << __func__ << " : "
237  << "Flushed 0 nodes from tx map";
238  }
239 
240  // Make sure calculated hashes are correct
241  if (ledger->stateMap().getHash().as_uint256() != accountHash)
242  {
243  JLOG(journal_.fatal())
244  << __func__ << " : "
245  << "State map hash does not match. "
246  << "Expected hash = " << strHex(accountHash) << "Actual hash = "
247  << strHex(ledger->stateMap().getHash().as_uint256());
248  Throw<std::runtime_error>("state map hash mismatch");
249  }
250 
251  if (ledger->txMap().getHash().as_uint256() != txHash)
252  {
253  JLOG(journal_.fatal())
254  << __func__ << " : "
255  << "Tx map hash does not match. "
256  << "Expected hash = " << strHex(txHash) << "Actual hash = "
257  << strHex(ledger->txMap().getHash().as_uint256());
258  Throw<std::runtime_error>("tx map hash mismatch");
259  }
260 
261  if (ledger->info().hash != ledgerHash)
262  {
263  JLOG(journal_.fatal())
264  << __func__ << " : "
265  << "Ledger hash does not match. "
266  << "Expected hash = " << strHex(ledgerHash)
267  << "Actual hash = " << strHex(ledger->info().hash);
268  Throw<std::runtime_error>("ledger hash mismatch");
269  }
270 
271  JLOG(journal_.info()) << __func__ << " : "
272  << "Successfully flushed ledger! "
273  << detail::toString(ledger->info());
274 }
275 
276 void
278 {
279  app_.getOPs().pubLedger(ledger);
280 
281  setLastPublish();
282 }
283 
284 bool
285 ReportingETL::publishLedger(uint32_t ledgerSequence, uint32_t maxAttempts)
286 {
287  JLOG(journal_.info()) << __func__ << " : "
288  << "Attempting to publish ledger = "
289  << ledgerSequence;
290  size_t numAttempts = 0;
291  while (!stopping_)
292  {
293  auto ledger = app_.getLedgerMaster().getLedgerBySeq(ledgerSequence);
294 
295  if (!ledger)
296  {
297  JLOG(journal_.warn())
298  << __func__ << " : "
299  << "Trying to publish. Could not find ledger with sequence = "
300  << ledgerSequence;
301  // We try maxAttempts times to publish the ledger, waiting one
302  // second in between each attempt.
303  // If the ledger is not present in the database after maxAttempts,
304  // we attempt to take over as the writer. If the takeover fails,
305  // doContinuousETL will return, and this node will go back to
306  // publishing.
307  // If the node is in strict read only mode, we simply
308  // skip publishing this ledger and return false indicating the
309  // publish failed
310  if (numAttempts >= maxAttempts)
311  {
312  JLOG(journal_.error()) << __func__ << " : "
313  << "Failed to publish ledger after "
314  << numAttempts << " attempts.";
315  if (!readOnly_)
316  {
317  JLOG(journal_.info()) << __func__ << " : "
318  << "Attempting to become ETL writer";
319  return false;
320  }
321  else
322  {
323  JLOG(journal_.debug())
324  << __func__ << " : "
325  << "In strict read-only mode. "
326  << "Skipping publishing this ledger. "
327  << "Beginning fast forward.";
328  return false;
329  }
330  }
331  else
332  {
334  ++numAttempts;
335  }
336  continue;
337  }
338 
339  publishStrand_.post([this, ledger, fname = __func__]() {
340  app_.getOPs().pubLedger(ledger);
341  setLastPublish();
342  JLOG(journal_.info())
343  << fname << " : "
344  << "Published ledger. " << detail::toString(ledger->info());
345  });
346  return true;
347  }
348  return false;
349 }
350 
353 {
354  JLOG(journal_.debug()) << __func__ << " : "
355  << "Attempting to fetch ledger with sequence = "
356  << idx;
357 
359  loadBalancer_.fetchLedger(idx, false);
360  JLOG(journal_.trace()) << __func__ << " : "
361  << "GetLedger reply = " << response->DebugString();
362  return response;
363 }
364 
367 {
368  JLOG(journal_.debug()) << __func__ << " : "
369  << "Attempting to fetch ledger with sequence = "
370  << idx;
371 
373  loadBalancer_.fetchLedger(idx, true);
374  JLOG(journal_.trace()) << __func__ << " : "
375  << "GetLedger reply = " << response->DebugString();
376  return response;
377 }
378 
382  org::xrpl::rpc::v1::GetLedgerResponse& rawData)
383 {
384  JLOG(journal_.info()) << __func__ << " : "
385  << "Beginning ledger update";
386 
387  LedgerInfo lgrInfo =
388  deserializeHeader(makeSlice(rawData.ledger_header()), true);
389 
390  JLOG(journal_.debug()) << __func__ << " : "
391  << "Deserialized ledger header. "
392  << detail::toString(lgrInfo);
393 
394  next->setLedgerInfo(lgrInfo);
395 
396  next->stateMap().clearSynching();
397  next->txMap().clearSynching();
398 
400  insertTransactions(next, rawData)};
401 
402  JLOG(journal_.debug())
403  << __func__ << " : "
404  << "Inserted all transactions. Number of transactions = "
405  << rawData.transactions_list().transactions_size();
406 
407  for (auto& obj : rawData.ledger_objects().objects())
408  {
409  auto key = uint256::fromVoidChecked(obj.key());
410  if (!key)
411  throw std::runtime_error("Recevied malformed object ID");
412 
413  auto& data = obj.data();
414 
415  // indicates object was deleted
416  if (data.size() == 0)
417  {
418  JLOG(journal_.trace()) << __func__ << " : "
419  << "Erasing object = " << *key;
420  if (next->exists(*key))
421  next->rawErase(*key);
422  }
423  else
424  {
425  SerialIter it{data.data(), data.size()};
426  std::shared_ptr<SLE> sle = std::make_shared<SLE>(it, *key);
427 
428  if (next->exists(*key))
429  {
430  JLOG(journal_.trace()) << __func__ << " : "
431  << "Replacing object = " << *key;
432  next->rawReplace(sle);
433  }
434  else
435  {
436  JLOG(journal_.trace()) << __func__ << " : "
437  << "Inserting object = " << *key;
438  next->rawInsert(sle);
439  }
440  }
441  }
442  JLOG(journal_.debug())
443  << __func__ << " : "
444  << "Inserted/modified/deleted all objects. Number of objects = "
445  << rawData.ledger_objects().objects_size();
446 
447  if (!rawData.skiplist_included())
448  {
449  next->updateSkipList();
450  JLOG(journal_.warn())
451  << __func__ << " : "
452  << "tx process is not sending skiplist. This indicates that the tx "
453  "process is parsing metadata instead of doing a SHAMap diff. "
454  "Make sure tx process is running the same code as reporting to "
455  "use SHAMap diff instead of parsing metadata";
456  }
457 
458  JLOG(journal_.debug()) << __func__ << " : "
459  << "Finished ledger update. "
460  << detail::toString(next->info());
461  return {std::move(next), std::move(accountTxData)};
462 }
463 
464 // Database must be populated when this starts
466 ReportingETL::runETLPipeline(uint32_t startSequence)
467 {
468  /*
469  * Behold, mortals! This function spawns three separate threads, which talk
470  * to each other via 2 different thread safe queues and 1 atomic variable.
471  * All threads and queues are function local. This function returns when all
472  * of the threads exit. There are two termination conditions: the first is
473  * if the load thread encounters a write conflict. In this case, the load
474  * thread sets writeConflict, an atomic bool, to true, which signals the
475  * other threads to stop. The second termination condition is when the
476  * entire server is shutting down, which is detected in one of three ways:
477  * 1. isStopping() returns true if the server is shutting down
478  * 2. networkValidatedLedgers_.waitUntilValidatedByNetwork returns
479  * false, signaling the wait was aborted.
480  * 3. fetchLedgerDataAndDiff returns an empty optional, signaling the fetch
481  * was aborted.
482  * In all cases, the extract thread detects this condition,
483  * and pushes an empty optional onto the transform queue. The transform
484  * thread, upon popping an empty optional, pushes an empty optional onto the
485  * load queue, and then returns. The load thread, upon popping an empty
486  * optional, returns.
487  */
488 
489  JLOG(journal_.debug()) << __func__ << " : "
490  << "Starting etl pipeline";
491  writing_ = true;
492 
493  std::shared_ptr<Ledger> parent = std::const_pointer_cast<Ledger>(
494  app_.getLedgerMaster().getLedgerBySeq(startSequence - 1));
495  if (!parent)
496  {
497  assert(false);
498  Throw<std::runtime_error>("runETLPipeline: parent ledger is null");
499  }
500 
501  std::atomic_bool writeConflict = false;
502  std::optional<uint32_t> lastPublishedSequence;
503  constexpr uint32_t maxQueueSize = 1000;
504 
506  transformQueue{maxQueueSize};
507 
508  std::thread extracter{[this,
509  &startSequence,
510  &writeConflict,
511  &transformQueue]() {
512  this_thread::set_name("ETL extract");
513  uint32_t currentSequence = startSequence;
514 
515  // there are two stopping conditions here.
516  // First, if there is a write conflict in the load thread, the ETL
517  // mechanism should stop.
518  // The other stopping condition is if the entire server is shutting
519  // down. This can be detected in a variety of ways. See the comment
520  // at the top of the function
522  currentSequence) &&
523  !writeConflict && !isStopping())
524  {
527  fetchLedgerDataAndDiff(currentSequence)};
528  // if the fetch is unsuccessful, stop. fetchLedger only returns
529  // false if the server is shutting down, or if the ledger was
530  // found in the database (which means another process already
531  // wrote the ledger that this process was trying to extract;
532  // this is a form of a write conflict). Otherwise,
533  // fetchLedgerDataAndDiff will keep trying to fetch the
534  // specified ledger until successful
535  if (!fetchResponse)
536  {
537  break;
538  }
539  auto end = std::chrono::system_clock::now();
540 
541  auto time = ((end - start).count()) / 1000000000.0;
542  auto tps =
543  fetchResponse->transactions_list().transactions_size() / time;
544 
545  JLOG(journal_.debug()) << "Extract phase time = " << time
546  << " . Extract phase tps = " << tps;
547 
548  transformQueue.push(std::move(fetchResponse));
549  ++currentSequence;
550  }
551  // empty optional tells the transformer to shut down
552  transformQueue.push({});
553  }};
554 
558  loadQueue{maxQueueSize};
559  std::thread transformer{[this,
560  &parent,
561  &writeConflict,
562  &loadQueue,
563  &transformQueue]() {
564  this_thread::set_name("ETL transform");
565 
566  assert(parent);
567  parent = std::make_shared<Ledger>(*parent, NetClock::time_point{});
568  while (!writeConflict)
569  {
571  transformQueue.pop()};
572  // if fetchResponse is an empty optional, the extracter thread has
573  // stopped and the transformer should stop as well
574  if (!fetchResponse)
575  {
576  break;
577  }
578  if (isStopping())
579  continue;
580 
582  auto [next, accountTxData] =
583  buildNextLedger(parent, *fetchResponse);
584  auto end = std::chrono::system_clock::now();
585 
586  auto duration = ((end - start).count()) / 1000000000.0;
587  JLOG(journal_.debug()) << "transform time = " << duration;
588  // The below line needs to execute before pushing to the queue, in
589  // order to prevent this thread and the loader thread from accessing
590  // the same SHAMap concurrently
591  parent = std::make_shared<Ledger>(*next, NetClock::time_point{});
592  loadQueue.push(
593  std::make_pair(std::move(next), std::move(accountTxData)));
594  }
595  // empty optional tells the loader to shutdown
596  loadQueue.push({});
597  }};
598 
599  std::thread loader{[this,
600  &lastPublishedSequence,
601  &loadQueue,
602  &writeConflict]() {
603  this_thread::set_name("ETL load");
604  size_t totalTransactions = 0;
605  double totalTime = 0;
606  while (!writeConflict)
607  {
611  result{loadQueue.pop()};
612  // if result is an empty optional, the transformer thread has
613  // stopped and the loader should stop as well
614  if (!result)
615  break;
616  if (isStopping())
617  continue;
618 
619  auto& ledger = result->first;
620  auto& accountTxData = result->second;
621 
623  // write to the key-value store
624  flushLedger(ledger);
625 
626  auto mid = std::chrono::system_clock::now();
627  // write to RDBMS
628  // if there is a write conflict, some other process has already
629  // written this ledger and has taken over as the ETL writer
630 #ifdef RIPPLED_REPORTING
631  if (!dynamic_cast<PostgresDatabase*>(&app_.getRelationalDatabase())
633  ledger->info(), accountTxData))
634  writeConflict = true;
635 #endif
636  auto end = std::chrono::system_clock::now();
637 
638  if (!writeConflict)
639  {
640  publishLedger(ledger);
641  lastPublishedSequence = ledger->info().seq;
642  }
643  // print some performance numbers
644  auto kvTime = ((mid - start).count()) / 1000000000.0;
645  auto relationalTime = ((end - mid).count()) / 1000000000.0;
646 
647  size_t numTxns = accountTxData.size();
648  totalTime += kvTime;
649  totalTransactions += numTxns;
650  JLOG(journal_.info())
651  << "Load phase of etl : "
652  << "Successfully published ledger! Ledger info: "
653  << detail::toString(ledger->info())
654  << ". txn count = " << numTxns
655  << ". key-value write time = " << kvTime
656  << ". relational write time = " << relationalTime
657  << ". key-value tps = " << numTxns / kvTime
658  << ". relational tps = " << numTxns / relationalTime
659  << ". total key-value tps = " << totalTransactions / totalTime;
660  }
661  }};
662 
663  // wait for all of the threads to stop
664  loader.join();
665  extracter.join();
666  transformer.join();
667  writing_ = false;
668 
669  JLOG(journal_.debug()) << __func__ << " : "
670  << "Stopping etl pipeline";
671 
672  return lastPublishedSequence;
673 }
674 
675 // main loop. The software begins monitoring the ledgers that are validated
676 // by the nework. The member networkValidatedLedgers_ keeps track of the
677 // sequences of ledgers validated by the network. Whenever a ledger is validated
678 // by the network, the software looks for that ledger in the database. Once the
679 // ledger is found in the database, the software publishes that ledger to the
680 // ledgers stream. If a network validated ledger is not found in the database
681 // after a certain amount of time, then the software attempts to take over
682 // responsibility of the ETL process, where it writes new ledgers to the
683 // database. The software will relinquish control of the ETL process if it
684 // detects that another process has taken over ETL.
685 void
687 {
688  auto ledger = std::const_pointer_cast<Ledger>(
690  if (!ledger)
691  {
692  JLOG(journal_.info()) << __func__ << " : "
693  << "Database is empty. Will download a ledger "
694  "from the network.";
695  if (startSequence_)
696  {
697  JLOG(journal_.info())
698  << __func__ << " : "
699  << "ledger sequence specified in config. "
700  << "Will begin ETL process starting with ledger "
701  << *startSequence_;
703  }
704  else
705  {
706  JLOG(journal_.info())
707  << __func__ << " : "
708  << "Waiting for next ledger to be validated by network...";
709  std::optional<uint32_t> mostRecentValidated =
711  if (mostRecentValidated)
712  {
713  JLOG(journal_.info()) << __func__ << " : "
714  << "Ledger " << *mostRecentValidated
715  << " has been validated. "
716  << "Downloading...";
717  ledger = loadInitialLedger(*mostRecentValidated);
718  }
719  else
720  {
721  JLOG(journal_.info()) << __func__ << " : "
722  << "The wait for the next validated "
723  << "ledger has been aborted. "
724  << "Exiting monitor loop";
725  return;
726  }
727  }
728  }
729  else
730  {
731  if (startSequence_)
732  {
733  Throw<std::runtime_error>(
734  "start sequence specified but db is already populated");
735  }
736  JLOG(journal_.info())
737  << __func__ << " : "
738  << "Database already populated. Picking up from the tip of history";
739  }
740  if (!ledger)
741  {
742  JLOG(journal_.error())
743  << __func__ << " : "
744  << "Failed to load initial ledger. Exiting monitor loop";
745  return;
746  }
747  else
748  {
749  publishLedger(ledger);
750  }
751  uint32_t nextSequence = ledger->info().seq + 1;
752 
753  JLOG(journal_.debug()) << __func__ << " : "
754  << "Database is populated. "
755  << "Starting monitor loop. sequence = "
756  << nextSequence;
757  while (!stopping_ &&
759  {
760  JLOG(journal_.info()) << __func__ << " : "
761  << "Ledger with sequence = " << nextSequence
762  << " has been validated by the network. "
763  << "Attempting to find in database and publish";
764  // Attempt to take over responsibility of ETL writer after 10 failed
765  // attempts to publish the ledger. publishLedger() fails if the
766  // ledger that has been validated by the network is not found in the
767  // database after the specified number of attempts. publishLedger()
768  // waits one second between each attempt to read the ledger from the
769  // database
770  //
771  // In strict read-only mode, when the software fails to find a
772  // ledger in the database that has been validated by the network,
773  // the software will only try to publish subsequent ledgers once,
774  // until one of those ledgers is found in the database. Once the
775  // software successfully publishes a ledger, the software will fall
776  // back to the normal behavior of trying several times to publish
777  // the ledger that has been validated by the network. In this
778  // manner, a reporting processing running in read-only mode does not
779  // need to restart if the database is wiped.
780  constexpr size_t timeoutSeconds = 10;
781  bool success = publishLedger(nextSequence, timeoutSeconds);
782  if (!success)
783  {
784  JLOG(journal_.warn())
785  << __func__ << " : "
786  << "Failed to publish ledger with sequence = " << nextSequence
787  << " . Beginning ETL";
788  // doContinousETLPipelined returns the most recent sequence
789  // published empty optional if no sequence was published
790  std::optional<uint32_t> lastPublished =
791  runETLPipeline(nextSequence);
792  JLOG(journal_.info()) << __func__ << " : "
793  << "Aborting ETL. Falling back to publishing";
794  // if no ledger was published, don't increment nextSequence
795  if (lastPublished)
796  nextSequence = *lastPublished + 1;
797  }
798  else
799  {
800  ++nextSequence;
801  }
802  }
803 }
804 
805 void
807 {
808  JLOG(journal_.debug()) << "Starting reporting in strict read only mode";
809  std::optional<uint32_t> mostRecent =
811  if (!mostRecent)
812  return;
813  uint32_t sequence = *mostRecent;
814  bool success = true;
815  while (!stopping_ &&
817  {
818  success = publishLedger(sequence, success ? 30 : 1);
819  ++sequence;
820  }
821 }
822 
823 void
825 {
826  worker_ = std::thread([this]() {
827  this_thread::set_name("ETL worker");
828  if (readOnly_)
829  monitorReadOnly();
830  else
831  monitor();
832  });
833 }
834 
836  : app_(app)
837  , journal_(app.journal("ReportingETL"))
838  , publishStrand_(app_.getIOService())
839  , loadBalancer_(*this)
840 {
841  // if present, get endpoint from config
842  if (app_.config().exists("reporting"))
843  {
844 #ifndef RIPPLED_REPORTING
845  Throw<std::runtime_error>(
846  "Config file specifies reporting, but software was not built with "
847  "-Dreporting=1. To use reporting, configure CMake with "
848  "-Dreporting=1");
849 #endif
850  if (!app_.config().useTxTables())
851  Throw<std::runtime_error>(
852  "Reporting requires tx tables. Set use_tx_tables=1 in config "
853  "file, under [ledger_tx_tables] section");
854  Section section = app_.config().section("reporting");
855 
856  JLOG(journal_.debug()) << "Parsing config info";
857 
858  auto& vals = section.values();
859  for (auto& v : vals)
860  {
861  JLOG(journal_.debug()) << "val is " << v;
862  Section source = app_.config().section(v);
863 
864  auto optIp = source.get("source_ip");
865  if (!optIp)
866  continue;
867 
868  auto optWsPort = source.get("source_ws_port");
869  if (!optWsPort)
870  continue;
871 
872  auto optGrpcPort = source.get("source_grpc_port");
873  if (!optGrpcPort)
874  {
875  // add source without grpc port
876  // used in read-only mode to detect when new ledgers have
877  // been validated. Used for publishing
878  if (app_.config().reportingReadOnly())
879  loadBalancer_.add(*optIp, *optWsPort);
880  continue;
881  }
882 
883  loadBalancer_.add(*optIp, *optWsPort, *optGrpcPort);
884  }
885 
886  // this is true iff --reportingReadOnly was passed via command line
888 
889  // if --reportingReadOnly was not passed via command line, check config
890  // file. Command line takes precedence
891  if (!readOnly_)
892  {
893  auto const optRO = section.get("read_only");
894  if (optRO)
895  {
896  readOnly_ = (*optRO == "true" || *optRO == "1");
898  }
899  }
900 
901  // lambda throws a useful message if string to integer conversion fails
902  auto asciiToIntThrows =
903  [](auto& dest, std::string const& src, char const* onError) {
904  char const* const srcEnd = src.data() + src.size();
905  auto [ptr, err] = std::from_chars(src.data(), srcEnd, dest);
906 
907  if (err == std::errc())
908  // skip whitespace at end of string
909  while (ptr != srcEnd &&
910  std::isspace(static_cast<unsigned char>(*ptr)))
911  ++ptr;
912 
913  // throw if
914  // o conversion error or
915  // o entire string is not consumed
916  if (err != std::errc() || ptr != srcEnd)
917  Throw<std::runtime_error>(onError + src);
918  };
919 
920  // handle command line arguments
921  if (app_.config().START_UP == Config::StartUpType::FRESH && !readOnly_)
922  {
923  asciiToIntThrows(
926  "Expected integral START_LEDGER command line argument. Got: ");
927  }
928  // if not passed via command line, check config for start sequence
929  if (!startSequence_)
930  {
931  auto const optStartSeq = section.get("start_sequence");
932  if (optStartSeq)
933  {
934  // set a value so we can dereference
935  startSequence_ = 0;
936  asciiToIntThrows(
938  *optStartSeq,
939  "Expected integral start_sequence config entry. Got: ");
940  }
941  }
942 
943  auto const optFlushInterval = section.get("flush_interval");
944  if (optFlushInterval)
945  asciiToIntThrows(
947  *optFlushInterval,
948  "Expected integral flush_interval config entry. Got: ");
949 
950  auto const optNumMarkers = section.get("num_markers");
951  if (optNumMarkers)
952  asciiToIntThrows(
953  numMarkers_,
954  *optNumMarkers,
955  "Expected integral num_markers config entry. Got: ");
956  }
957 }
958 
959 } // namespace ripple
ripple::NetworkOPs::pubLedger
virtual void pubLedger(std::shared_ptr< ReadView const > const &lpAccepted)=0
beast::Journal::fatal
Stream fatal() const
Definition: Journal.h:339
ripple::ReportingETL::flushInterval_
size_t flushInterval_
Used to determine when to write to the database during the initial ledger download.
Definition: ReportingETL.h:115
ripple::Section
Holds a collection of configuration values.
Definition: BasicConfig.h:42
ripple::Application
Definition: Application.h:116
ripple::Application::getNodeFamily
virtual Family & getNodeFamily()=0
ripple::HashPrefix::ledgerMaster
@ ledgerMaster
ledger master data for signing
std::this_thread::sleep_for
T sleep_for(T... args)
ripple::makeSlice
std::enable_if_t< std::is_same< T, char >::value||std::is_same< T, unsigned char >::value, Slice > makeSlice(std::array< T, N > const &a)
Definition: Slice.h:241
ripple::ReportingETL::fetchLedgerData
std::optional< org::xrpl::rpc::v1::GetLedgerResponse > fetchLedgerData(uint32_t sequence)
Extract data for a particular ledger from an ETL source.
Definition: ReportingETL.cpp:352
ripple::ReportingETL::loadInitialLedger
std::shared_ptr< Ledger > loadInitialLedger(uint32_t sequence)
Download a ledger with specified sequence in full, via GetLedgerData, and write the data to the datab...
Definition: ReportingETL.cpp:106
ripple::ReportingETL::startSequence_
std::optional< uint32_t > startSequence_
Ledger sequence to start ETL from.
Definition: ReportingETL.h:139
std::string
STL class.
std::shared_ptr< Ledger >
ripple::ThreadSafeQueue
Generic thread-safe queue with an optional maximum size Note, we can't use a lockfree queue here,...
Definition: ETLHelpers.h:115
beast::Journal::trace
Stream trace() const
Severity stream access functions.
Definition: Journal.h:309
ripple::Serializer::modData
Blob & modData()
Definition: Serializer.h:179
std::pair
ripple::ReportingETL::setLastPublish
void setLastPublish()
Definition: ReportingETL.h:155
ripple::ReportingETL::insertTransactions
std::vector< AccountTransactionsData > insertTransactions(std::shared_ptr< Ledger > &ledger, org::xrpl::rpc::v1::GetLedgerResponse &data)
Insert all of the extracted transactions into the ledger.
Definition: ReportingETL.cpp:75
ripple::ThreadSafeQueue::push
void push(T const &elt)
Definition: ETLHelpers.h:136
charconv
ripple::ReportingETL::flushLedger
void flushLedger(std::shared_ptr< Ledger > &ledger)
Write all new data to the key-value store.
Definition: ReportingETL.cpp:181
ripple::hotACCOUNT_NODE
@ hotACCOUNT_NODE
Definition: NodeObject.h:35
std::vector
STL class.
ripple::ReportingETL::loadBalancer_
ETLLoadBalancer loadBalancer_
Mechanism for communicating with ETL sources.
Definition: ReportingETL.h:96
ripple::LedgerHeader::parentHash
uint256 parentHash
Definition: LedgerHeader.h:52
ripple::this_thread::set_name
void set_name(std::string s)
std::chrono::seconds
ripple::LedgerHeader::seq
LedgerIndex seq
Definition: LedgerHeader.h:41
ripple::ETLLoadBalancer::fetchLedger
std::optional< org::xrpl::rpc::v1::GetLedgerResponse > fetchLedger(uint32_t ledgerSequence, bool getObjects)
Fetch data for a specific ledger.
Definition: ETLSource.cpp:698
ripple::NetworkValidatedLedgers::waitUntilValidatedByNetwork
bool waitUntilValidatedByNetwork(uint32_t sequence)
Waits for the sequence to be validated by the network.
Definition: ETLHelpers.h:89
ripple::NodeStore::Database::sync
virtual void sync()=0
std::stringstream
STL class.
beast::Journal::warn
Stream warn() const
Definition: Journal.h:327
ripple::LedgerHeader::accountHash
uint256 accountHash
Definition: LedgerHeader.h:51
ripple::NodeStore::Database::store
virtual void store(NodeObjectType type, Blob &&data, uint256 const &hash, std::uint32_t ledgerSeq)=0
Store the object.
ripple::hotTRANSACTION_NODE
@ hotTRANSACTION_NODE
Definition: NodeObject.h:36
ripple::base_uint< 256 >::fromVoidChecked
static std::optional< base_uint > fromVoidChecked(T const &from)
Definition: base_uint.h:319
iostream
ripple::LedgerHeader::txHash
uint256 txHash
Definition: LedgerHeader.h:50
ripple::NetworkValidatedLedgers::getMostRecent
std::optional< uint32_t > getMostRecent() const
Get most recently validated sequence.
Definition: ETLHelpers.h:67
ripple::XRP_LEDGER_EARLIEST_FEES
static constexpr std::uint32_t XRP_LEDGER_EARLIEST_FEES
The XRP Ledger mainnet's earliest ledger with a FeeSettings object.
Definition: SystemParameters.h:73
ripple::Application::getOPs
virtual NetworkOPs & getOPs()=0
ripple::Section::values
std::vector< std::string > const & values() const
Returns all the values in the section.
Definition: BasicConfig.h:77
ripple::ReportingETL::networkValidatedLedgers_
NetworkValidatedLedgers networkValidatedLedgers_
Mechanism for detecting when the network has validated a new ledger.
Definition: ReportingETL.h:100
ripple::TxMeta
Definition: TxMeta.h:32
ripple::LedgerHeader::hash
uint256 hash
Definition: LedgerHeader.h:49
ripple::base_uint< 256 >
ripple::ReportingETL::publishLedger
bool publishLedger(uint32_t ledgerSequence, uint32_t maxAttempts=10)
Attempt to read the specified ledger from the database, and then publish that ledger to the ledgers s...
Definition: ReportingETL.cpp:285
ripple::ReportingETL::journal_
beast::Journal journal_
Definition: ReportingETL.h:75
ripple::Config::reporting
bool reporting() const
Definition: Config.h:338
std::thread
STL class.
ripple::Application::getLedgerMaster
virtual LedgerMaster & getLedgerMaster()=0
ripple::ReportingETL::writing_
std::atomic_bool writing_
Whether the process is writing to the database. Used by server_info.
Definition: ReportingETL.h:134
ripple::ReportingETL::readOnly_
bool readOnly_
Whether the process is in strict read-only mode.
Definition: ReportingETL.h:131
ripple::ReportingETL::numMarkers_
size_t numMarkers_
This variable controls the number of GetLedgerData calls that will be executed in parallel during the...
Definition: ReportingETL.h:126
ripple::ETLLoadBalancer::loadInitialLedger
void loadInitialLedger(uint32_t sequence, ThreadSafeQueue< std::shared_ptr< SLE >> &writeQueue)
Load the initial ledger, writing data to the queue.
Definition: ETLSource.cpp:679
ripple::Application::config
virtual Config & config()=0
ripple::Application::getRelationalDatabase
virtual RelationalDatabase & getRelationalDatabase()=0
ripple::Config::useTxTables
bool useTxTables() const
Definition: Config.h:344
ripple::ReportingETL::monitorReadOnly
void monitorReadOnly()
Monitor the database for newly written ledgers.
Definition: ReportingETL.cpp:806
ripple::detail::toString
std::string toString(LedgerInfo const &info)
Convenience function for printing out basic ledger info.
Definition: ReportingETL.cpp:41
beast::Journal::error
Stream error() const
Definition: Journal.h:333
beast::Journal::info
Stream info() const
Definition: Journal.h:321
std::chrono::time_point
ripple::ReportingETL::publishStrand_
boost::asio::io_context::strand publishStrand_
Strand to ensure that ledgers are published in order.
Definition: ReportingETL.h:91
ripple::STTx
Definition: STTx.h:45
ripple::LedgerMaster::getLedgerBySeq
std::shared_ptr< Ledger const > getLedgerBySeq(std::uint32_t index)
Definition: LedgerMaster.cpp:1823
std::errc
std::runtime_error
STL class.
ripple::SerialIter
Definition: Serializer.h:311
ripple::Config::START_UP
StartUpType START_UP
Definition: Config.h:154
std::atomic_bool
std::from_chars
T from_chars(T... args)
ripple::ReportingETL::fetchLedgerDataAndDiff
std::optional< org::xrpl::rpc::v1::GetLedgerResponse > fetchLedgerDataAndDiff(uint32_t sequence)
Extract data for a particular ledger from an ETL source.
Definition: ReportingETL.cpp:366
ripple::LedgerHeader
Information about the notional ledger backing the view.
Definition: LedgerHeader.h:33
ripple::Serializer
Definition: Serializer.h:40
ripple::Config::setReportingReadOnly
void setReportingReadOnly(bool b)
Definition: Config.h:356
std::vector::emplace_back
T emplace_back(T... args)
ripple
Use hash_* containers for keys that do not need a cryptographically secure hashing algorithm.
Definition: RCLCensorshipDetector.h:29
ripple::ReportingETL::stopping_
std::atomic_bool stopping_
Whether the software is stopping.
Definition: ReportingETL.h:103
ripple::Application::getNodeStore
virtual NodeStore::Database & getNodeStore()=0
ripple::addRaw
void addRaw(LedgerHeader const &info, Serializer &s, bool includeHash)
Definition: protocol/impl/LedgerHeader.cpp:25
cstdlib
ripple::PostgresDatabase::writeLedgerAndTransactions
virtual bool writeLedgerAndTransactions(LedgerInfo const &info, std::vector< AccountTransactionsData > const &accountTxData)=0
writeLedgerAndTransactions Writes new ledger and transaction data into the database.
ripple::deserializeHeader
LedgerHeader deserializeHeader(Slice data, bool hasHash)
Deserialize a ledger header from a byte array.
Definition: protocol/impl/LedgerHeader.cpp:42
ripple::PostgresDatabase
Definition: PostgresDatabase.h:27
ripple::LedgerMaster::getValidatedLedger
std::shared_ptr< Ledger const > getValidatedLedger()
Definition: LedgerMaster.cpp:1669
ripple::Section::get
std::optional< T > get(std::string const &name) const
Definition: BasicConfig.h:138
cctype
ripple::ReportingETL::app_
Application & app_
Definition: ReportingETL.h:73
std::optional
std::stringstream::str
T str(T... args)
beast::Journal::debug
Stream debug() const
Definition: Journal.h:315
ripple::ReportingETL::start
void start()
start all of the necessary components and begin ETL
Definition: ReportingETL.h:326
ripple::keylet::fees
Keylet const & fees() noexcept
The (fixed) index of the object containing the ledger fees.
Definition: Indexes.cpp:172
ripple::ReportingETL::worker_
std::thread worker_
Definition: ReportingETL.h:77
ripple::hotLEDGER
@ hotLEDGER
Definition: NodeObject.h:34
std::make_pair
T make_pair(T... args)
ripple::Serializer::add32
int add32(std::uint32_t i)
Definition: Serializer.cpp:38
ripple::strHex
std::string strHex(FwdIt begin, FwdIt end)
Definition: strHex.h:30
ripple::Config::reportingReadOnly
bool reportingReadOnly() const
Definition: Config.h:350
ripple::ReportingETL::buildNextLedger
std::pair< std::shared_ptr< Ledger >, std::vector< AccountTransactionsData > > buildNextLedger(std::shared_ptr< Ledger > &parent, org::xrpl::rpc::v1::GetLedgerResponse &rawData)
Build the next ledger using the previous ledger and the extracted data.
Definition: ReportingETL.cpp:380
ripple::ReportingETL::monitor
void monitor()
Monitor the network for newly validated ledgers.
Definition: ReportingETL.cpp:686
ripple::ReportingETL::ReportingETL
ReportingETL(Application &app)
Definition: ReportingETL.cpp:835
ripple::Config::START_LEDGER
std::string START_LEDGER
Definition: Config.h:158
ripple::ReportingETL::isStopping
bool isStopping() const
Definition: ReportingETL.h:282
ripple::ReportingETL::consumeLedgerData
void consumeLedgerData(std::shared_ptr< Ledger > &ledger, ThreadSafeQueue< std::shared_ptr< SLE >> &writeQueue)
Consume data from a queue and insert that data into the ledger This function will continue to pull fr...
Definition: ReportingETL.cpp:53
ripple::ReportingETL::doWork
void doWork()
Definition: ReportingETL.cpp:824
ripple::ReportingETL::runETLPipeline
std::optional< uint32_t > runETLPipeline(uint32_t startSequence)
Run ETL.
Definition: ReportingETL.cpp:466
ripple::BasicConfig::exists
bool exists(std::string const &name) const
Returns true if a section with the given name exists.
Definition: BasicConfig.cpp:121
ripple::BasicConfig::section
Section & section(std::string const &name)
Returns the section with the given name.
Definition: BasicConfig.cpp:127
variant
string
ripple::ETLLoadBalancer::add
void add(std::string &host, std::string &websocketPort, std::string &grpcPort)
Add an ETL source.
Definition: ETLSource.cpp:656
std::chrono::system_clock::now
T now(T... args)